Re: Patch 7.4.088

John Marriott Wed, 13 Nov 2013 11:38:43 -0800

On 12-Nov-2013 2:44 PM, Bram Moolenaar wrote:

Patch 7.4.088
Problem:    When spell checking is enabled Asian characters are always marked
             as error.
Solution:   When 'spelllang' contains "cjk" do not mark Asian characters as
             error. (Ken Takata)
Files:      runtime/doc/options.txt, runtime/doc/spell.txt, src/mbyte.c,
             src/option.c, src/spell.c, src/structs.h



*** ../vim-7.4.087/runtime/doc/options.txt      2013-11-06 05:26:08.000000000 
+0100
--- runtime/doc/options.txt     2013-11-12 04:00:51.000000000 +0100
***************
*** 6555,6560 ****
--- 6555,6563 ----
        region by listing them: "en_us,en_ca" supports both US and Canadian
        English, but not words specific for Australia, New Zealand or Great
        Britain.
+       If the name "cjk" is included East Asian characters are excluded from
+       spell checking.  This is useful when editing text that also has Asian
+       words.
                                                        *E757*
        As a special case the name of a .spl file can be given as-is.  The
        first "_xx" in the name is removed and used as the region name
*** ../vim-7.4.087/runtime/doc/spell.txt        2013-08-10 13:25:01.000000000 
+0200
--- runtime/doc/spell.txt       2013-11-12 04:02:27.000000000 +0100
***************
*** 269,274 ****
--- 269,281 ----
        latin1          yi              transliterated Yiddish
        utf-8           yi-tr           transliterated Yiddish

+ *spell-cjk*

+ Chinese, Japanese and other East Asian characters are normally marked as
+ errors, because spell checking of these characters is not supported. If
+ 'spelllang' includes "cjk", these characters are not marked as errors.  This
+ is useful when editing text with spell checking while some Asian words are
+ present.
+

SPELL FILES *spell-load**** ../vim-7.4.087/src/mbyte.c 2013-07-05 20:07:21.000000000 +0200

--- src/mbyte.c 2013-11-12 03:55:50.000000000 +0100
***************
*** 947,954 ****
                {
                    case 0x2121: /* ZENKAKU space */
                        return 0;
!                   case 0x2122: /* KU-TEN (Japanese comma) */
!                   case 0x2123: /* TOU-TEN (Japanese period) */
                    case 0x2124: /* ZENKAKU comma */
                    case 0x2125: /* ZENKAKU period */
                        return 1;
--- 947,954 ----
                {
                    case 0x2121: /* ZENKAKU space */
                        return 0;
!                   case 0x2122: /* TOU-TEN (Japanese comma) */
!                   case 0x2123: /* KU-TEN (Japanese period) */
                    case 0x2124: /* ZENKAKU comma */
                    case 0x2125: /* ZENKAKU period */
                        return 1;
***************
*** 2477,2485 ****
       /* sorted list of non-overlapping intervals */
       static struct clinterval
       {
!       unsigned short first;
!       unsigned short last;
!       unsigned short class;
       } classes[] =
       {
        {0x037e, 0x037e, 1},            /* Greek question mark */
--- 2477,2485 ----
       /* sorted list of non-overlapping intervals */
       static struct clinterval
       {
!       unsigned int first;
!       unsigned int last;
!       unsigned int class;
       } classes[] =
       {
        {0x037e, 0x037e, 1},            /* Greek question mark */
***************
*** 2544,2549 ****
--- 2544,2553 ----
        {0xff1a, 0xff20, 1},            /* half/fullwidth ASCII */
        {0xff3b, 0xff40, 1},            /* half/fullwidth ASCII */
        {0xff5b, 0xff65, 1},            /* half/fullwidth ASCII */
+       {0x20000, 0x2a6df, 0x4e00},     /* CJK Ideographs */
+       {0x2a700, 0x2b73f, 0x4e00},     /* CJK Ideographs */
+       {0x2b740, 0x2b81f, 0x4e00},     /* CJK Ideographs */
+       {0x2f800, 0x2fa1f, 0x4e00},     /* CJK Ideographs */
       };
       int bot = 0;
       int top = sizeof(classes) / sizeof(struct clinterval) - 1;
***************
*** 2563,2571 ****
       while (top >= bot)
       {
        mid = (bot + top) / 2;
!       if (classes[mid].last < c)
            bot = mid + 1;
!       else if (classes[mid].first > c)
            top = mid - 1;
        else
            return (int)classes[mid].class;
--- 2567,2575 ----
       while (top >= bot)
       {
        mid = (bot + top) / 2;
!       if (classes[mid].last < (unsigned int)c)
            bot = mid + 1;
!       else if (classes[mid].first > (unsigned int)c)
            top = mid - 1;
        else
            return (int)classes[mid].class;
*** ../vim-7.4.087/src/option.c 2013-11-08 04:30:06.000000000 +0100
--- src/option.c        2013-11-12 04:34:46.000000000 +0100
***************
*** 7122,7127 ****
--- 7122,7132 ----
        if (varp == &(curwin->w_s->b_p_spl))
        {
            char_u      fname[200];
+           char_u      *q = curwin->w_s->b_p_spl;
+
+           /* Skip the first name if it is "cjk". */
+           if (STRNCMP(q, "cjk,", 4) == 0)
+               q += 4;

/*

             * Source the spell/LANG.vim in 'runtimepath'.
***************
*** 7129,7139 ****
             * Use the first name in 'spelllang' up to '_region' or
             * '.encoding'.
             */
!           for (p = curwin->w_s->b_p_spl; *p != NUL; ++p)
                if (vim_strchr((char_u *)"_.,", *p) != NULL)
                    break;
!           vim_snprintf((char *)fname, 200, "spell/%.*s.vim",
!                                (int)(p - curwin->w_s->b_p_spl), 
curwin->w_s->b_p_spl);
            source_runtime(fname, TRUE);
        }
   #endif
--- 7134,7143 ----
             * Use the first name in 'spelllang' up to '_region' or
             * '.encoding'.
             */
!           for (p = q; *p != NUL; ++p)
                if (vim_strchr((char_u *)"_.,", *p) != NULL)
                    break;
!           vim_snprintf((char *)fname, 200, "spell/%.*s.vim", (int)(p - q), q);
            source_runtime(fname, TRUE);
        }
   #endif
*** ../vim-7.4.087/src/spell.c  2013-09-29 13:38:25.000000000 +0200
--- src/spell.c 2013-11-12 04:37:33.000000000 +0100
***************
*** 754,762 ****
   static void clear_spell_chartab __ARGS((spelltab_T *sp));
   static int set_spell_finish __ARGS((spelltab_T       *new_st));
   static int spell_iswordp __ARGS((char_u *p, win_T *wp));
! static int spell_iswordp_nmw __ARGS((char_u *p));
   #ifdef FEAT_MBYTE
! static int spell_mb_isword_class __ARGS((int cl));
   static int spell_iswordp_w __ARGS((int *p, win_T *wp));
   #endif
   static int write_spell_prefcond __ARGS((FILE *fd, garray_T *gap));
--- 754,762 ----
   static void clear_spell_chartab __ARGS((spelltab_T *sp));
   static int set_spell_finish __ARGS((spelltab_T       *new_st));
   static int spell_iswordp __ARGS((char_u *p, win_T *wp));
! static int spell_iswordp_nmw __ARGS((char_u *p, win_T *wp));
   #ifdef FEAT_MBYTE
! static int spell_mb_isword_class __ARGS((int cl, win_T *wp));
   static int spell_iswordp_w __ARGS((int *p, win_T *wp));
   #endif
   static int write_spell_prefcond __ARGS((FILE *fd, garray_T *gap));
***************
*** 1149,1155 ****

/* When we are at a non-word character there is no error, just

         * skip over the character (try looking for a word after it). */
!       else if (!spell_iswordp_nmw(ptr))
        {
            if (capcol != NULL && wp->w_s->b_cap_prog != NULL)
            {
--- 1149,1155 ----

/* When we are at a non-word character there is no error, just

         * skip over the character (try looking for a word after it). */
!       else if (!spell_iswordp_nmw(ptr, wp))
        {
            if (capcol != NULL && wp->w_s->b_cap_prog != NULL)
            {
***************
*** 1561,1567 ****
                         * accept a no-caps word, even when the dictionary
                         * word specifies ONECAP. */
                        mb_ptr_back(mip->mi_word, p);
!                       if (spell_iswordp_nmw(p)
                                ? capflags == WF_ONECAP
                                : (flags & WF_ONECAP) != 0
                                                     && capflags != WF_ONECAP)
--- 1561,1567 ----
                         * accept a no-caps word, even when the dictionary
                         * word specifies ONECAP. */
                        mb_ptr_back(mip->mi_word, p);
!                       if (spell_iswordp_nmw(p, mip->mi_win)
                                ? capflags == WF_ONECAP
                                : (flags & WF_ONECAP) != 0
                                                     && capflags != WF_ONECAP)
***************
*** 4234,4240 ****
       if (spl_copy == NULL)
        goto theend;

! /* loop over comma separated language names. */

       for (splp = spl_copy; *splp != NUL; )
       {
        /* Get one language name. */
--- 4234,4242 ----
       if (spl_copy == NULL)
        goto theend;

! wp->w_s->b_cjk = 0;

!
!     /* Loop over comma separated language names. */
       for (splp = spl_copy; *splp != NUL; )
       {
        /* Get one language name. */
***************
*** 4242,4247 ****
--- 4244,4255 ----
        region = NULL;
        len = (int)STRLEN(lang);

+ if (STRCMP(lang, "cjk") == 0)

+       {
+           wp->w_s->b_cjk = 1;
+           continue;
+       }
+
        /* If the name ends in ".spl" use it as the name of the spell file.
         * If there is a region name let "region" point to it and remove it
         * from the name. */
***************
*** 4601,4607 ****
       int              past_second = FALSE;    /* past second word char */

/* find first letter */

!     for (p = word; !spell_iswordp_nmw(p); mb_ptr_adv(p))
        if (end == NULL ? *p == NUL : p >= end)
            return 0;       /* only non-word characters, illegal word */
   #ifdef FEAT_MBYTE
--- 4609,4615 ----
       int              past_second = FALSE;    /* past second word char */

/* find first letter */

!     for (p = word; !spell_iswordp_nmw(p, curwin); mb_ptr_adv(p))
        if (end == NULL ? *p == NUL : p >= end)
            return 0;       /* only non-word characters, illegal word */
   #ifdef FEAT_MBYTE
***************
*** 4617,4623 ****
        * But a word with an upper char only at start is a ONECAP.
        */
       for ( ; end == NULL ? *p != NUL : p < end; mb_ptr_adv(p))
!       if (spell_iswordp_nmw(p))
        {
            c = PTR2CHAR(p);
            if (!SPELL_ISUPPER(c))
--- 4625,4631 ----
        * But a word with an upper char only at start is a ONECAP.
        */
       for ( ; end == NULL ? *p != NUL : p < end; mb_ptr_adv(p))
!       if (spell_iswordp_nmw(p, curwin))
        {
            c = PTR2CHAR(p);
            if (!SPELL_ISUPPER(c))
***************
*** 9907,9913 ****

c = mb_ptr2char(s);

        if (c > 255)
!           return spell_mb_isword_class(mb_get_class(s));
        return spelltab.st_isw[c];
       }
   #endif
--- 9915,9921 ----

c = mb_ptr2char(s);

        if (c > 255)
!           return spell_mb_isword_class(mb_get_class(s), wp);
        return spelltab.st_isw[c];
       }
   #endif
***************
*** 9920,9927 ****
    * Unlike spell_iswordp() this doesn't check for "midword" characters.
    */
       static int
! spell_iswordp_nmw(p)
       char_u   *p;
   {
   #ifdef FEAT_MBYTE
       int              c;
--- 9928,9936 ----
    * Unlike spell_iswordp() this doesn't check for "midword" characters.
    */
       static int
! spell_iswordp_nmw(p, wp)
       char_u   *p;
+     win_T     *wp;
   {
   #ifdef FEAT_MBYTE
       int              c;
***************
*** 9930,9936 ****
       {
        c = mb_ptr2char(p);
        if (c > 255)
!           return spell_mb_isword_class(mb_get_class(p));
        return spelltab.st_isw[c];
       }
   #endif
--- 9939,9945 ----
       {
        c = mb_ptr2char(p);
        if (c > 255)
!           return spell_mb_isword_class(mb_get_class(p), wp);
        return spelltab.st_isw[c];
       }
   #endif
***************
*** 9942,9952 ****
    * Return TRUE if word class indicates a word character.
    * Only for characters above 255.
    * Unicode subscript and superscript are not considered word characters.
    */
       static int
! spell_mb_isword_class(cl)
!     int cl;
   {
       return cl >= 2 && cl != 0x2070 && cl != 0x2080;
   }

--- 9951,9966 ----

    * Return TRUE if word class indicates a word character.
    * Only for characters above 255.
    * Unicode subscript and superscript are not considered word characters.
+  * See also dbcs_class() and utf_class() in mbyte.c.
    */
       static int
! spell_mb_isword_class(cl, wp)
!     int               cl;
!     win_T     *wp;
   {
+     if (wp->w_s->b_cjk)
+       /* East Asian characters are not considered word characters. */
+       return cl == 2 || cl == 0x2800;
       return cl >= 2 && cl != 0x2070 && cl != 0x2080;
   }

***************

*** 9971,9979 ****
       if (*s > 255)
       {
        if (enc_utf8)
!           return spell_mb_isword_class(utf_class(*s));
        if (enc_dbcs)
!           return dbcs_class((unsigned)*s >> 8, *s & 0xff) >= 2;
        return 0;
       }
       return spelltab.st_isw[*s];
--- 9985,9994 ----
       if (*s > 255)
       {
        if (enc_utf8)
!           return spell_mb_isword_class(utf_class(*s), wp);
        if (enc_dbcs)
!           return spell_mb_isword_class(
!                               dbcs_class((unsigned)*s >> 8, *s & 0xff), wp);
        return 0;
       }
       return spelltab.st_isw[*s];
***************
*** 10193,10205 ****
        line = ml_get_curline();
        p = line + curwin->w_cursor.col;
        /* Backup to before start of word. */
!       while (p > line && spell_iswordp_nmw(p))
            mb_ptr_back(line, p);
        /* Forward to start of word. */
!       while (*p != NUL && !spell_iswordp_nmw(p))
            mb_ptr_adv(p);

! if (!spell_iswordp_nmw(p)) /* No word found. */

        {
            beep_flush();
            return;
--- 10208,10220 ----
        line = ml_get_curline();
        p = line + curwin->w_cursor.col;
        /* Backup to before start of word. */
!       while (p > line && spell_iswordp_nmw(p, curwin))
            mb_ptr_back(line, p);
        /* Forward to start of word. */
!       while (*p != NUL && !spell_iswordp_nmw(p, curwin))
            mb_ptr_adv(p);

! if (!spell_iswordp_nmw(p, curwin)) /* No word found. */

        {
            beep_flush();
            return;
***************
*** 10436,10442 ****
        for (;;)
        {
            mb_ptr_back(line, p);
!           if (p == line || spell_iswordp_nmw(p))
                break;
            if (vim_regexec(&regmatch, p, 0)
                                         && regmatch.endp[0] == line + endcol)
--- 10451,10457 ----
        for (;;)
        {
            mb_ptr_back(line, p);
!           if (p == line || spell_iswordp_nmw(p, curwin))
                break;
            if (vim_regexec(&regmatch, p, 0)
                                         && regmatch.endp[0] == line + endcol)
***************
*** 11645,11651 ****

/* When appending a compound word after a word character don't

                 * use Onecap. */
!               if (p != NULL && spell_iswordp_nmw(p))
                    c &= ~WF_ONECAP;
                make_case_word(tword + sp->ts_splitoff,
                                              preword + sp->ts_prewordlen, c);
--- 11660,11666 ----

/* When appending a compound word after a word character don't

                 * use Onecap. */
!               if (p != NULL && spell_iswordp_nmw(p, curwin))
                    c &= ~WF_ONECAP;
                make_case_word(tword + sp->ts_splitoff,
                                              preword + sp->ts_prewordlen, c);
***************
*** 11895,11901 ****
                         * character when the word ends.  But only when the
                         * good word can end. */
                        if (((!try_compound && !spell_iswordp_nmw(fword
!                                                              + sp->ts_fidx))
                                    || fword_ends)
                                && fword[sp->ts_fidx] != NUL
                                && goodword_ends)
--- 11910,11917 ----
                         * character when the word ends.  But only when the
                         * good word can end. */
                        if (((!try_compound && !spell_iswordp_nmw(fword
!                                                              + sp->ts_fidx,
!                                                              curwin))
                                    || fword_ends)
                                && fword[sp->ts_fidx] != NUL
                                && goodword_ends)
***************
*** 14226,14232 ****
            }
            else
            {
!               if (spell_iswordp_nmw(s))
                    *t++ = *s;
                ++s;
            }
--- 14242,14248 ----
            }
            else
            {
!               if (spell_iswordp_nmw(s, curwin))
                    *t++ = *s;
                ++s;
            }
***************
*** 14521,14527 ****
            else
            {
                did_white = FALSE;
!               if (!spell_iswordp_nmw(t))
                    continue;
            }
        }
--- 14537,14543 ----
            else
            {
                did_white = FALSE;
!               if (!spell_iswordp_nmw(t, curwin))
                    continue;
            }
        }
***************
*** 16045,16051 ****
       for (p = line + startcol; p > line; )
       {
        mb_ptr_back(line, p);
!       if (spell_iswordp_nmw(p))
            break;
       }

--- 16061,16067 ----

       for (p = line + startcol; p > line; )
       {
        mb_ptr_back(line, p);
!       if (spell_iswordp_nmw(p, curwin))
            break;
       }

*** ../vim-7.4.087/src/structs.h 2013-11-09 05:30:18.000000000 +0100

--- src/structs.h       2013-11-12 03:55:50.000000000 +0100
***************
*** 1310,1315 ****
--- 1310,1318 ----
       regprog_T        *b_cap_prog;    /* program for 'spellcapcheck' */
       char_u   *b_p_spf;       /* 'spellfile' */
       char_u   *b_p_spl;       /* 'spelllang' */
+ # ifdef FEAT_MBYTE
+     int               b_cjk;          /* all CJK letters as OK */
+ # endif
   #endif
   #if !defined(FEAT_SYN_HL) && !defined(FEAT_SPELL)
       int              dummy;
*** ../vim-7.4.087/src/version.c        2013-11-11 23:17:31.000000000 +0100
--- src/version.c       2013-11-12 03:59:03.000000000 +0100
***************
*** 740,741 ****
--- 740,743 ----
   {   /* Add new patch number below this line */
+ /**/
+     88,
   /**/

Hi All,

This patch fails to build on (HP-UP in my case) if FEAT_MBYTE is notdefinedlike so:

cc -c -I. -Iproto -DHAVE_CONFIG_H     -O2       -o objects/spell.o spell.c
cc: "spell.c", line 4237: error 1588: "b_cjk" undefined.
cc: "spell.c", line 4237: error 1531: Invalid member of struct or union.
cc: "spell.c", line 4249: error 1531: Invalid member of struct or union.
*** Error exit code 1

I haven't had time to make a patch sorry.

Cheers
John

--
--
You received this message from the "vim_dev" maillist.
Do not top-post! Type your reply below the text you are replying to.
For more information, visit http://www.vim.org/maillist.php

---You received this message because you are subscribed to the Google Groups "vim_dev" group.

To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
For more options, visit https://groups.google.com/groups/opt_out.

Re: Patch 7.4.088

Raspunde prin e-mail lui