[patch] exclude East Asian characters form spell checking

Ken Takata Mon, 07 Oct 2013 05:03:14 -0700

Hi,

I wrote a patch for the following items from todo.txt:


> Have an option for spell checking to not mark any Chinese, Japanese or other
> double-width characters as error.  Or perhaps all characters above 256.
> (Bill Sun)  Helps a lot for mixed Asian and latin text.

> -   have some way not to give spelling errors for a range of characters.
>     E.g. for Chinese and other languages with specific characters for which we
>     don't have a spell file.  Useful when there is also text in other
>     languages in the file.

When I write mixed Japanese and English text, it really annoys me.
Current Vim's spell checking algorithm doesn't support Chinese, Japanese or
other East Asian languages. So I just exclude these characters from spell
checking. (No options)
Please check the attached patch.

Regards,
Ken Takata

-- 
-- 
You received this message from the "vim_dev" maillist.
Do not top-post! Type your reply below the text you are replying to.
For more information, visit http://www.vim.org/maillist.php

--- 
You received this message because you are subscribed to the Google Groups 
"vim_dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to vim_dev+unsubscr...@googlegroups.com.
For more options, visit https://groups.google.com/groups/opt_out.

# HG changeset patch
# Parent f7b74dbc56302abc7b7d3f470fa87a4800ada1bf

diff --git a/src/mbyte.c b/src/mbyte.c
--- a/src/mbyte.c
+++ b/src/mbyte.c
@@ -2490,9 +2490,9 @@
     /* sorted list of non-overlapping intervals */
     static struct clinterval
     {
-	unsigned short first;
-	unsigned short last;
-	unsigned short class;
+	unsigned int first;
+	unsigned int last;
+	unsigned int class;
     } classes[] =
     {
 	{0x037e, 0x037e, 1},		/* Greek question mark */
@@ -2557,6 +2557,10 @@
 	{0xff1a, 0xff20, 1},		/* half/fullwidth ASCII */
 	{0xff3b, 0xff40, 1},		/* half/fullwidth ASCII */
 	{0xff5b, 0xff65, 1},		/* half/fullwidth ASCII */
+	{0x20000, 0x2a6df, 0x4e00},	/* CJK Ideographs */
+	{0x2a700, 0x2b73f, 0x4e00},	/* CJK Ideographs */
+	{0x2b740, 0x2b81f, 0x4e00},	/* CJK Ideographs */
+	{0x2f800, 0x2fa1f, 0x4e00},	/* CJK Ideographs */
     };
     int bot = 0;
     int top = sizeof(classes) / sizeof(struct clinterval) - 1;
@@ -2576,9 +2580,9 @@
     while (top >= bot)
     {
 	mid = (bot + top) / 2;
-	if (classes[mid].last < c)
+	if (classes[mid].last < (unsigned int)c)
 	    bot = mid + 1;
-	else if (classes[mid].first > c)
+	else if (classes[mid].first > (unsigned int)c)
 	    top = mid - 1;
 	else
 	    return (int)classes[mid].class;
diff --git a/src/spell.c b/src/spell.c
--- a/src/spell.c
+++ b/src/spell.c
@@ -9942,12 +9942,16 @@
  * Return TRUE if word class indicates a word character.
  * Only for characters above 255.
  * Unicode subscript and superscript are not considered word characters.
+ * East Asian characters are not considered word characters, because spell
+ * checking for those characters is not currently supported.
+ * See also dbcs_class() and utf_class() in mbyte.c.
  */
     static int
 spell_mb_isword_class(cl)
     int cl;
 {
-    return cl >= 2 && cl != 0x2070 && cl != 0x2080;
+    /* return cl >= 2 && cl != 0x2070 && cl != 0x2080; */
+    return cl == 2;
 }
 
 /*

[patch] exclude East Asian characters form spell checking

Raspunde prin e-mail lui