Hi,
I wrote a patch for the following items from todo.txt:
> Have an option for spell checking to not mark any Chinese, Japanese or other
> double-width characters as error. Or perhaps all characters above 256.
> (Bill Sun) Helps a lot for mixed Asian and latin text.
> - have some way not to give spelling errors for a range of characters.
> E.g. for Chinese and other languages with specific characters for which we
> don't have a spell file. Useful when there is also text in other
> languages in the file.
When I write mixed Japanese and English text, it really annoys me.
Current Vim's spell checking algorithm doesn't support Chinese, Japanese or
other East Asian languages. So I just exclude these characters from spell
checking. (No options)
Please check the attached patch.
Regards,
Ken Takata
--
--
You received this message from the "vim_dev" maillist.
Do not top-post! Type your reply below the text you are replying to.
For more information, visit http://www.vim.org/maillist.php
---
You received this message because you are subscribed to the Google Groups
"vim_dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email
to [email protected].
For more options, visit https://groups.google.com/groups/opt_out.
# HG changeset patch
# Parent f7b74dbc56302abc7b7d3f470fa87a4800ada1bf
diff --git a/src/mbyte.c b/src/mbyte.c
--- a/src/mbyte.c
+++ b/src/mbyte.c
@@ -2490,9 +2490,9 @@
/* sorted list of non-overlapping intervals */
static struct clinterval
{
- unsigned short first;
- unsigned short last;
- unsigned short class;
+ unsigned int first;
+ unsigned int last;
+ unsigned int class;
} classes[] =
{
{0x037e, 0x037e, 1}, /* Greek question mark */
@@ -2557,6 +2557,10 @@
{0xff1a, 0xff20, 1}, /* half/fullwidth ASCII */
{0xff3b, 0xff40, 1}, /* half/fullwidth ASCII */
{0xff5b, 0xff65, 1}, /* half/fullwidth ASCII */
+ {0x20000, 0x2a6df, 0x4e00}, /* CJK Ideographs */
+ {0x2a700, 0x2b73f, 0x4e00}, /* CJK Ideographs */
+ {0x2b740, 0x2b81f, 0x4e00}, /* CJK Ideographs */
+ {0x2f800, 0x2fa1f, 0x4e00}, /* CJK Ideographs */
};
int bot = 0;
int top = sizeof(classes) / sizeof(struct clinterval) - 1;
@@ -2576,9 +2580,9 @@
while (top >= bot)
{
mid = (bot + top) / 2;
- if (classes[mid].last < c)
+ if (classes[mid].last < (unsigned int)c)
bot = mid + 1;
- else if (classes[mid].first > c)
+ else if (classes[mid].first > (unsigned int)c)
top = mid - 1;
else
return (int)classes[mid].class;
diff --git a/src/spell.c b/src/spell.c
--- a/src/spell.c
+++ b/src/spell.c
@@ -9942,12 +9942,16 @@
* Return TRUE if word class indicates a word character.
* Only for characters above 255.
* Unicode subscript and superscript are not considered word characters.
+ * East Asian characters are not considered word characters, because spell
+ * checking for those characters is not currently supported.
+ * See also dbcs_class() and utf_class() in mbyte.c.
*/
static int
spell_mb_isword_class(cl)
int cl;
{
- return cl >= 2 && cl != 0x2070 && cl != 0x2080;
+ /* return cl >= 2 && cl != 0x2070 && cl != 0x2080; */
+ return cl == 2;
}
/*