Package: jless Version: 382-iso262-2 Severity: wishlist Tags: patch Please apply the UTF-8 patch.
less-382-iso262-utf8.dpatch: jless-utf8.patch less-382-iso262-utf8-2.dpatch: bug fix (assert with binary file) Original: http://nijino.homelinux.net/diary/200605.shtml http://nijino.homelinux.net/tmp/jless-utf8.patch ITO Keisuke
#! /bin/sh /usr/share/dpatch/dpatch-run ## less-382-iso262-utf8.dpatch ## ## All lines beginning with `## DP:' are a description of the patch. ## DP: No description. @DPATCH@ diff -urNad jless-382-iso262~/charset.c jless-382-iso262/charset.c --- jless-382-iso262~/charset.c 2010-07-04 12:43:53.000000000 +0900 +++ jless-382-iso262/charset.c 2010-07-04 12:44:58.000000000 +0900 @@ -130,6 +130,11 @@ /* recoginize all JIS except supplement */ SCSASCII | SCSALLSJIS, ESISO7, ESALLJA, ESSJIS }, +#ifdef ARI_UTF8 + { "japanese-utf-8", NULL, "8bcccb4c11bc4b95.b127.b", + SCSASCII | SCSALLJIS, + ESISO7, ESALLJA, ESUTF8 }, +#endif /* read all KANJI before 1983 */ { "japanese83-iso7", NULL, "8bcccb4c11bc4b95.b127.b", diff -urNad jless-382-iso262~/multi.c jless-382-iso262/multi.c --- jless-382-iso262~/multi.c 2010-07-04 12:43:53.000000000 +0900 +++ jless-382-iso262/multi.c 2010-07-04 12:44:58.000000000 +0900 @@ -753,6 +753,8 @@ mp->startpos = mp->lastpos + 1; return 1; } +#ifdef ARI_UTF8 +#else } else if (ISUJISKANJI(c0, c1)) { if (mp->io.scs & SCSJISX0213_2004) { mp->icharset = UJIS2004; @@ -785,6 +787,7 @@ } /* data are recognized as kanji or wrong data, so return 1 */ return 1; +#endif } else if (ISUTF8_HEAD(c0) && ISUTF8_REST(c1)) { /* do nothing. return 1 to get next byte */ return 1; @@ -1087,28 +1090,28 @@ return; } } - if (mp->io.inputr & ESUJIS) { + if (mp->io.inputr & ESUTF8) { if (internalize_utf8(mp)) { mp->priority = PUTF8; return; } } } else if (mp->lastpos - mp->startpos + 1 == 4) { - if (mp->io.inputr & ESUJIS) { + if (mp->io.inputr & ESUTF8) { if (internalize_utf8(mp)) { mp->priority = PUTF8; return; } } } else if (mp->lastpos - mp->startpos + 1 == 5) { - if (mp->io.inputr & ESUJIS) { + if (mp->io.inputr & ESUTF8) { if (internalize_utf8(mp)) { mp->priority = PUTF8; return; } } } else if (mp->lastpos - mp->startpos + 1 == 6) { - if (mp->io.inputr & ESUJIS) { + if (mp->io.inputr & ESUTF8) { if (internalize_utf8(mp)) { mp->priority = PUTF8; return; @@ -2276,6 +2279,36 @@ } #endif +#ifdef ARI_UTF8 +#include <iconv.h> +#ifndef ICONV_EUC_JP +#define ICONV_EUC_JP ("eucJP") +#endif +//#ifndef ICONV_SHIFT_JIS +//#define ICONV_SHIFT_JIS ("ms932") +//#endif + +static int iconv_to_utf8(const char* fromcode) +{ + iconv_t cd = iconv_open("UTF-8", fromcode); + assert(cd != (iconv_t)(-1)); + + size_t inbytesleft = strlen(cvbuffer); + char outbuf[4]; + size_t outbytesleft = 4; + char* inptr = cvbuffer; + char* outptr = outbuf; + size_t ret = iconv(cd, &inptr, &inbytesleft, &outptr, &outbytesleft); + iconv_close(cd); + if (ret == (iconv_t)(-1)) + { + return FALSE; + } + assert(inbytesleft == 0); + strncpy(cvbuffer, outbuf, 4 - outbytesleft); + return TRUE; +} +#endif static char *convert_to_utf8(c, cs) int c; int cs; @@ -2296,12 +2329,46 @@ cs = CS2CHARSET(cs); +#ifndef ARI_UTF8 assert(0); +#endif if (cs == ASCII || cs == JISX0201ROMAN) { assert(cvindex == 1); cvindex = 0; return (cvbuffer); +#ifdef ARI_UTF8 + } else if (cs == UTF8) + { + cvindex = 0; + return (cvbuffer); +#ifdef ICONV_SHIFT_JIS + } else if (cs == JISX0208_78KANJI || cs == JISX0208KANJI || + cs == JISX0208_90KANJI || cs == JISX0213KANJI1 || + cs == JISX02132004KANJI1) + { + cvindex--; + if (convert_to_sjis(c, cs) == nullcvbuffer) + return (nullcvbuffer); + iconv_to_utf8(ICONV_SHIFT_JIS); + return (cvbuffer); + } else if (cs == JISX0201KANA || cs == JISX02132004KANJI2 || + cs == JISX0212KANJISUP) +#else + } else if (cs == JISX0208_78KANJI || cs == JISX0208KANJI || + cs == JISX0208_90KANJI || cs == JISX0213KANJI1 || + cs == JISX02132004KANJI1 || + cs == JISX0201KANA || cs == JISX02132004KANJI2 || + cs == JISX0212KANJISUP) +#endif + { + cvindex--; + if (convert_to_ujis(c, cs) == nullcvbuffer) + return (nullcvbuffer); + iconv_to_utf8(ICONV_EUC_JP); + return (cvbuffer); + } +#else } else if (cs == JISX0201KANA) { assert(cvindex == 1); @@ -2324,6 +2391,7 @@ cvindex = 0; return (nullcvbuffer); } +#endif assert(0); cvindex = 0; return (cvbuffer);
#! /bin/sh /usr/share/dpatch/dpatch-run ## less-382-iso262-utf8-2.dpatch ## ## All lines beginning with `## DP:' are a description of the patch. ## DP: No description. @DPATCH@ diff -urNad jless-382-iso262~/charset.c jless-382-iso262/charset.c --- jless-382-iso262~/charset.c 2010-07-18 01:28:13.000000000 +0900 +++ jless-382-iso262/charset.c 2010-07-18 01:28:31.000000000 +0900 @@ -130,11 +130,9 @@ /* recoginize all JIS except supplement */ SCSASCII | SCSALLSJIS, ESISO7, ESALLJA, ESSJIS }, -#ifdef ARI_UTF8 { "japanese-utf-8", NULL, "8bcccb4c11bc4b95.b127.b", SCSASCII | SCSALLJIS, ESISO7, ESALLJA, ESUTF8 }, -#endif /* read all KANJI before 1983 */ { "japanese83-iso7", NULL, "8bcccb4c11bc4b95.b127.b", diff -urNad jless-382-iso262~/multi.c jless-382-iso262/multi.c --- jless-382-iso262~/multi.c 2010-07-18 01:28:13.000000000 +0900 +++ jless-382-iso262/multi.c 2010-07-18 01:30:12.000000000 +0900 @@ -753,41 +753,6 @@ mp->startpos = mp->lastpos + 1; return 1; } -#ifdef ARI_UTF8 -#else - } else if (ISUJISKANJI(c0, c1)) { - if (mp->io.scs & SCSJISX0213_2004) { - mp->icharset = UJIS2004; - mp->cs = JISX02132004KANJI1; - } else if (mp->io.scs & SCSJISX0213_2000) { - mp->icharset = UJIS2000; - mp->cs = JISX0213KANJI1; - } else { - mp->icharset = UJIS; - mp->cs = JISX0208KANJI; - } - mp->multiint[mp->intindex] = c0; - mp->multics[mp->intindex] = mp->icharset; - mp->multiint[mp->intindex + 1] = c1; - mp->multics[mp->intindex + 1] = REST_MASK | mp->icharset; - - /* Check character whether it has defined glyph or not */ - if (chisvalid_cs(&mp->multiint[mp->intindex], - &mp->multics[mp->intindex])) { - /* defined */ - mp->multiint[mp->intindex] = c0 & 0x7f; - mp->multics[mp->intindex] = mp->cs; - mp->multiint[mp->intindex + 1] = c1 & 0x7f; - mp->multics[mp->intindex + 1] = REST_MASK | mp->cs; - mp->intindex += 2; - mp->startpos = mp->lastpos + 1; - } else { - /* undefined. less ignore them */ - wrongchar(mp); - } - /* data are recognized as kanji or wrong data, so return 1 */ - return 1; -#endif } else if (ISUTF8_HEAD(c0) && ISUTF8_REST(c1)) { /* do nothing. return 1 to get next byte */ return 1; @@ -2279,14 +2244,13 @@ } #endif -#ifdef ARI_UTF8 #include <iconv.h> #ifndef ICONV_EUC_JP -#define ICONV_EUC_JP ("eucJP") +#define ICONV_EUC_JP ("EUCJP-MS") +#endif +#ifndef ICONV_SHIFT_JIS +#define ICONV_SHIFT_JIS ("ms932") #endif -//#ifndef ICONV_SHIFT_JIS -//#define ICONV_SHIFT_JIS ("ms932") -//#endif static int iconv_to_utf8(const char* fromcode) { @@ -2308,7 +2272,6 @@ strncpy(cvbuffer, outbuf, 4 - outbytesleft); return TRUE; } -#endif static char *convert_to_utf8(c, cs) int c; int cs; @@ -2329,20 +2292,15 @@ cs = CS2CHARSET(cs); -#ifndef ARI_UTF8 - assert(0); -#endif if (cs == ASCII || cs == JISX0201ROMAN) { assert(cvindex == 1); cvindex = 0; return (cvbuffer); -#ifdef ARI_UTF8 } else if (cs == UTF8) { cvindex = 0; return (cvbuffer); -#ifdef ICONV_SHIFT_JIS } else if (cs == JISX0208_78KANJI || cs == JISX0208KANJI || cs == JISX0208_90KANJI || cs == JISX0213KANJI1 || cs == JISX02132004KANJI1) @@ -2354,13 +2312,6 @@ return (cvbuffer); } else if (cs == JISX0201KANA || cs == JISX02132004KANJI2 || cs == JISX0212KANJISUP) -#else - } else if (cs == JISX0208_78KANJI || cs == JISX0208KANJI || - cs == JISX0208_90KANJI || cs == JISX0213KANJI1 || - cs == JISX02132004KANJI1 || - cs == JISX0201KANA || cs == JISX02132004KANJI2 || - cs == JISX0212KANJISUP) -#endif { cvindex--; if (convert_to_ujis(c, cs) == nullcvbuffer) @@ -2368,31 +2319,6 @@ iconv_to_utf8(ICONV_EUC_JP); return (cvbuffer); } -#else - } else if (cs == JISX0201KANA) - { - assert(cvindex == 1); - cvbuffer[0] |= 0x80; - cvindex = 0; - return (cvbuffer); - } else if (cs == JISX0208_78KANJI || cs == JISX0208KANJI || - cs == JISX0208_90KANJI || cs == JISX0213KANJI1 || - cs == JISX02132004KANJI1) - { - cvindex = 0; - return (cvbuffer); - } else if (cs == JISX0213KANJI2) - { - cvindex = 0; - return (cvbuffer); - } else if (cs == UTF8) - { - /* ? */ - cvindex = 0; - return (nullcvbuffer); - } -#endif - assert(0); cvindex = 0; return (cvbuffer); }