I noticed page breaking issue in my patch. I've fixed it.
--- texinfo.tex.org 2016-01-21 23:04:22.405562200 +0900 +++ texinfo.tex 2016-01-28 22:23:50.283561700 +0900 @@ -9433,43 +9433,68 @@ \global\righthyphenmin = #3\relax } -% Get input by bytes instead of by UTF-8 codepoints for XeTeX and LuaTeX, -% otherwise the encoding support is completely broken. +% XeTeX and LuaTeX can handle native Unicode. +% Their default I/O is UTF-8 sequence instead of byte-wise. +% Other TeX engine (pdfTeX etc.) I/O is byte-wise. +% +\newif\iftxinativeunicodecapable +\newif\iftxiusebytewiseio + \ifx\XeTeXrevision\thisisundefined + \ifx\luatexversion\thisisundefined + \txinativeunicodecapablefalse + \txiusebytewiseiotrue + \else + \txinativeunicodecapabletrue + \txiusebytewiseiofalse + \fi \else -\XeTeXdefaultencoding "bytes" % For subsequent files to be read -\XeTeXinputencoding "bytes" % Effective in texinfo.tex only -% Unfortunately, there seems to be no corresponding XeTeX command for -% output encoding. This is a problem for auxiliary index and TOC files. -% The only solution would be perhaps to write out @U{...} sequences in -% place of UTF-8 characters. + \txinativeunicodecapabletrue + \txiusebytewiseiofalse \fi -\ifx\luatexversion\thisisundefined -\else -\directlua{ -local utf8_char, byte, gsub = unicode.utf8.char, string.byte, string.gsub -local function convert_char (char) - return utf8_char(byte(char)) -end - -local function convert_line (line) - return gsub(line, ".", convert_char) -end - -callback.register("process_input_buffer", convert_line) - -local function convert_line_out (line) - local line_out = "" - for c in string.utfvalues(line) do - line_out = line_out .. string.char(c) - end - return line_out -end +% Set I/O by bytes instead of UTF-8 sequence for XeTeX and LuaTex +% for non-UTF-8 (byte-wise) encodings. +% +\def\setbytewiseio{% + \ifx\XeTeXrevision\thisisundefined + \else + \XeTeXdefaultencoding "bytes" % For subsequent files to be read + \XeTeXinputencoding "bytes" % For document root file + % Unfortunately, there seems to be no corresponding XeTeX command for + % output encoding. This is a problem for auxiliary index and TOC files. + % The only solution would be perhaps to write out @U{...} sequences in + % place of non-ASCII characters. + \fi -callback.register("process_output_buffer", convert_line_out) + \ifx\luatexversion\thisisundefined + \else + \directlua{ + local utf8_char, byte, gsub = unicode.utf8.char, string.byte, string.gsub + local function convert_char (char) + return utf8_char(byte(char)) + end + + local function convert_line (line) + return gsub(line, ".", convert_char) + end + + callback.register("process_input_buffer", convert_line) + + local function convert_line_out (line) + local line_out = "" + for c in string.utfvalues(line) do + line_out = line_out .. string.char(c) + end + return line_out + end + + callback.register("process_output_buffer", convert_line_out) + } + \fi + + \txiusebytewiseiotrue } -\fi % Helpers for encodings. @@ -9496,13 +9521,6 @@ % \def\documentencoding{\parseargusing\filenamecatcodes\documentencodingzzz} \def\documentencodingzzz#1{% - % Get input by bytes instead of by UTF-8 codepoints for XeTeX, - % otherwise the encoding support is completely broken. - % This settings is for the document root file. - \ifx\XeTeXrevision\thisisundefined - \else - \XeTeXinputencoding "bytes" - \fi % % Encoding being declared for the document. \def\declaredencoding{\csname #1.enc\endcsname}% @@ -9519,22 +9537,37 @@ \asciichardefs % \else \ifx \declaredencoding \lattwo + \iftxinativeunicodecapable + \setbytewiseio + \fi \setnonasciicharscatcode\active \lattwochardefs % \else \ifx \declaredencoding \latone + \iftxinativeunicodecapable + \setbytewiseio + \fi \setnonasciicharscatcode\active \latonechardefs % \else \ifx \declaredencoding \latnine + \iftxinativeunicodecapable + \setbytewiseio + \fi \setnonasciicharscatcode\active \latninechardefs % \else \ifx \declaredencoding \utfeight - \setnonasciicharscatcode\active - % since we already invoked \utfeightchardefs at the top level - % (below), do not re-invoke it, then our check for duplicated - % definitions triggers. Making non-ascii chars active is enough. + \iftxinativeunicodecapable + % For native Unicode (XeTeX and LuaTeX) + \nativeunicodechardefs + \else + % For UTF-8 byte sequence (pdfTeX) + \setnonasciicharscatcode\active + % since we already invoked \utfeightchardefs at the top level + % (below), do not re-invoke it, then our check for duplicated + % definitions triggers. Making non-ascii chars active is enough. + \fi % \else \message{Ignoring unknown document encoding: #1.}% @@ -9849,13 +9882,26 @@ % @U{xxxx} to produce U+xxxx, if we support it. \def\U#1{% \expandafter\ifx\csname uni:#1\endcsname \relax - \errhelp = \EMsimple - \errmessage{Unicode character U+#1 not supported, sorry}% + \iftxinativeunicodecapable + % Any Unicode characters can be used by native Unicode. + % However, if the font does not have the glyph, the letter will miss. + \begingroup + \uccode`\.="#1\relax + \uppercase{.} + \endgroup + \else + \errhelp = \EMsimple + \errmessage{Unicode character U+#1 not supported, sorry}% + \fi \else \csname uni:#1\endcsname \fi } +% For UTF-8 byte sequence (pdfTeX) +% Definition macro to replace the Unicode character +% Definition macro that is used by @U command +% \begingroup \catcode`\"=12 \catcode`\<=12 @@ -9864,7 +9910,7 @@ \catcode`\;=12 \catcode`\!=12 \catcode`\~=13 - \gdef\DeclareUnicodeCharacter#1#2{% + \gdef\DeclareUnicodeCharacterUTFviii#1#2{% \countUTFz = "#1\relax %\wlog{\space\space defining Unicode char U+#1 (decimal \the\countUTFz)}% \begingroup @@ -9922,6 +9968,37 @@ \uppercase{\gdef\UTFviiiTmp{#2#3#4}}} \endgroup +% For native Unicode (XeTeX and LuaTeX) +% Definition macro to replace the Unicode character +% +\def\DeclareUnicodeCharacterNative#1#2{% + \catcode"#1=\active + \begingroup + \uccode`\~="#1\relax + \uppercase{\gdef~}{#2}% + \endgroup} + +% For native Unicode (XeTeX and LuaTeX) +% Definition macro not to replace (through) the Unicode character +% +\def\DeclareUnicodeCharacterNativeThru#1#2{% + \catcode"#1=\active + \begingroup + \uccode`\.="#1\relax + \uppercase{\endgroup \def\UTFNativeTmp{.}}% + \begingroup + \uccode`\~="#1\relax + \uppercase{\endgroup \edef~}{\UTFNativeTmp}% +} + +% For native Unicode (XeTeX and LuaTeX) +% Definition macro that is used by @U command +% +\def\DeclareUnicodeCharacterNativeAtU#1#2{% + \def\UTFAtUTmp{#2} + \expandafter\globallet\csname uni:#1\endcsname \UTFAtUTmp +} + % https://en.wikipedia.org/wiki/Plane_(Unicode)#Basic_M % U+0000..U+007F = https://en.wikipedia.org/wiki/Basic_Latin_(Unicode_block) % U+0080..U+00FF = https://en.wikipedia.org/wiki/Latin-1_Supplement_(Unicode_block) @@ -9936,7 +10013,7 @@ % We won't be doing that here in this simple file. But we can try to at % least make most of the characters not bomb out. % -\def\utfeightchardefs{% +\def\unicodechardefs{% \DeclareUnicodeCharacter{00A0}{\tie} \DeclareUnicodeCharacter{00A1}{\exclamdown} \DeclareUnicodeCharacter{00A2}{{\tcfont \char162}}% 0242=cent @@ -9953,7 +10030,7 @@ \DeclareUnicodeCharacter{00AD}{\-} \DeclareUnicodeCharacter{00AE}{\registeredsymbol} \DeclareUnicodeCharacter{00AF}{\={ }} - +% \DeclareUnicodeCharacter{00B0}{\ringaccent{ }} \DeclareUnicodeCharacter{00B1}{\ensuremath\pm} \DeclareUnicodeCharacter{00B2}{$^2$} @@ -9970,7 +10047,7 @@ \DeclareUnicodeCharacter{00BD}{$1\over2$} \DeclareUnicodeCharacter{00BE}{$3\over4$} \DeclareUnicodeCharacter{00BF}{\questiondown} - +% \DeclareUnicodeCharacter{00C0}{\`A} \DeclareUnicodeCharacter{00C1}{\'A} \DeclareUnicodeCharacter{00C2}{\^A} @@ -9987,7 +10064,7 @@ \DeclareUnicodeCharacter{00CD}{\'I} \DeclareUnicodeCharacter{00CE}{\^I} \DeclareUnicodeCharacter{00CF}{\"I} - +% \DeclareUnicodeCharacter{00D0}{\DH} \DeclareUnicodeCharacter{00D1}{\~N} \DeclareUnicodeCharacter{00D2}{\`O} @@ -10004,7 +10081,7 @@ \DeclareUnicodeCharacter{00DD}{\'Y} \DeclareUnicodeCharacter{00DE}{\TH} \DeclareUnicodeCharacter{00DF}{\ss} - +% \DeclareUnicodeCharacter{00E0}{\`a} \DeclareUnicodeCharacter{00E1}{\'a} \DeclareUnicodeCharacter{00E2}{\^a} @@ -10021,7 +10098,7 @@ \DeclareUnicodeCharacter{00ED}{\'{\dotless{i}}} \DeclareUnicodeCharacter{00EE}{\^{\dotless{i}}} \DeclareUnicodeCharacter{00EF}{\"{\dotless{i}}} - +% \DeclareUnicodeCharacter{00F0}{\dh} \DeclareUnicodeCharacter{00F1}{\~n} \DeclareUnicodeCharacter{00F2}{\`o} @@ -10038,7 +10115,7 @@ \DeclareUnicodeCharacter{00FD}{\'y} \DeclareUnicodeCharacter{00FE}{\th} \DeclareUnicodeCharacter{00FF}{\"y} - +% \DeclareUnicodeCharacter{0100}{\=A} \DeclareUnicodeCharacter{0101}{\=a} \DeclareUnicodeCharacter{0102}{\u{A}} @@ -10055,7 +10132,7 @@ \DeclareUnicodeCharacter{010D}{\v{c}} \DeclareUnicodeCharacter{010E}{\v{D}} \DeclareUnicodeCharacter{010F}{d'} - +% \DeclareUnicodeCharacter{0110}{\DH} \DeclareUnicodeCharacter{0111}{\dh} \DeclareUnicodeCharacter{0112}{\=E} @@ -10072,7 +10149,7 @@ \DeclareUnicodeCharacter{011D}{\^g} \DeclareUnicodeCharacter{011E}{\u{G}} \DeclareUnicodeCharacter{011F}{\u{g}} - +% \DeclareUnicodeCharacter{0120}{\dotaccent{G}} \DeclareUnicodeCharacter{0121}{\dotaccent{g}} \DeclareUnicodeCharacter{0122}{\cedilla{G}} @@ -10089,7 +10166,7 @@ \DeclareUnicodeCharacter{012D}{\u{\dotless{i}}} \DeclareUnicodeCharacter{012E}{\ogonek{I}} \DeclareUnicodeCharacter{012F}{\ogonek{i}} - +% \DeclareUnicodeCharacter{0130}{\dotaccent{I}} \DeclareUnicodeCharacter{0131}{\dotless{i}} \DeclareUnicodeCharacter{0132}{IJ} @@ -10106,7 +10183,7 @@ \DeclareUnicodeCharacter{013D}{L'}% should kern \DeclareUnicodeCharacter{013E}{l'}% should kern \DeclareUnicodeCharacter{013F}{L\U{00B7}} - +% \DeclareUnicodeCharacter{0140}{l\U{00B7}} \DeclareUnicodeCharacter{0141}{\L} \DeclareUnicodeCharacter{0142}{\l} @@ -10123,7 +10200,7 @@ \DeclareUnicodeCharacter{014D}{\=o} \DeclareUnicodeCharacter{014E}{\u{O}} \DeclareUnicodeCharacter{014F}{\u{o}} - +% \DeclareUnicodeCharacter{0150}{\H{O}} \DeclareUnicodeCharacter{0151}{\H{o}} \DeclareUnicodeCharacter{0152}{\OE} @@ -10140,7 +10217,7 @@ \DeclareUnicodeCharacter{015D}{\^s} \DeclareUnicodeCharacter{015E}{\cedilla{S}} \DeclareUnicodeCharacter{015F}{\cedilla{s}} - +% \DeclareUnicodeCharacter{0160}{\v{S}} \DeclareUnicodeCharacter{0161}{\v{s}} \DeclareUnicodeCharacter{0162}{\cedilla{T}} @@ -10157,7 +10234,7 @@ \DeclareUnicodeCharacter{016D}{\u{u}} \DeclareUnicodeCharacter{016E}{\ringaccent{U}} \DeclareUnicodeCharacter{016F}{\ringaccent{u}} - +% \DeclareUnicodeCharacter{0170}{\H{U}} \DeclareUnicodeCharacter{0171}{\H{u}} \DeclareUnicodeCharacter{0172}{\ogonek{U}} @@ -10174,7 +10251,7 @@ \DeclareUnicodeCharacter{017D}{\v{Z}} \DeclareUnicodeCharacter{017E}{\v{z}} \DeclareUnicodeCharacter{017F}{\missingcharmsg{LONG S}} - +% \DeclareUnicodeCharacter{01C4}{D\v{Z}} \DeclareUnicodeCharacter{01C5}{D\v{z}} \DeclareUnicodeCharacter{01C6}{d\v{z}} @@ -10187,20 +10264,20 @@ \DeclareUnicodeCharacter{01CD}{\v{A}} \DeclareUnicodeCharacter{01CE}{\v{a}} \DeclareUnicodeCharacter{01CF}{\v{I}} - +% \DeclareUnicodeCharacter{01D0}{\v{\dotless{i}}} \DeclareUnicodeCharacter{01D1}{\v{O}} \DeclareUnicodeCharacter{01D2}{\v{o}} \DeclareUnicodeCharacter{01D3}{\v{U}} \DeclareUnicodeCharacter{01D4}{\v{u}} - +% \DeclareUnicodeCharacter{01E2}{\={\AE}} \DeclareUnicodeCharacter{01E3}{\={\ae}} \DeclareUnicodeCharacter{01E6}{\v{G}} \DeclareUnicodeCharacter{01E7}{\v{g}} \DeclareUnicodeCharacter{01E8}{\v{K}} \DeclareUnicodeCharacter{01E9}{\v{k}} - +% \DeclareUnicodeCharacter{01F0}{\v{\dotless{j}}} \DeclareUnicodeCharacter{01F1}{DZ} \DeclareUnicodeCharacter{01F2}{Dz} @@ -10213,23 +10290,23 @@ \DeclareUnicodeCharacter{01FD}{\'{\ae}} \DeclareUnicodeCharacter{01FE}{\'{\O}} \DeclareUnicodeCharacter{01FF}{\'{\o}} - +% \DeclareUnicodeCharacter{021E}{\v{H}} \DeclareUnicodeCharacter{021F}{\v{h}} - +% \DeclareUnicodeCharacter{0226}{\dotaccent{A}} \DeclareUnicodeCharacter{0227}{\dotaccent{a}} \DeclareUnicodeCharacter{0228}{\cedilla{E}} \DeclareUnicodeCharacter{0229}{\cedilla{e}} \DeclareUnicodeCharacter{022E}{\dotaccent{O}} \DeclareUnicodeCharacter{022F}{\dotaccent{o}} - +% \DeclareUnicodeCharacter{0232}{\=Y} \DeclareUnicodeCharacter{0233}{\=y} \DeclareUnicodeCharacter{0237}{\dotless{j}} - +% \DeclareUnicodeCharacter{02DB}{\ogonek{ }} - +% % Greek letters upper case \DeclareUnicodeCharacter{0391}{{\it A}} \DeclareUnicodeCharacter{0392}{{\it B}} @@ -10256,7 +10333,7 @@ \DeclareUnicodeCharacter{03A7}{{\it X}} \DeclareUnicodeCharacter{03A8}{\ensuremath{\mit\Psi}} \DeclareUnicodeCharacter{03A9}{\ensuremath{\mit\Omega}} - +% % Vowels with accents \DeclareUnicodeCharacter{0390}{\ensuremath{\ddot{\acute\iota}}} \DeclareUnicodeCharacter{03AC}{\ensuremath{\acute\alpha}} @@ -10264,10 +10341,10 @@ \DeclareUnicodeCharacter{03AE}{\ensuremath{\acute\eta}} \DeclareUnicodeCharacter{03AF}{\ensuremath{\acute\iota}} \DeclareUnicodeCharacter{03B0}{\ensuremath{\acute{\ddot\upsilon}}} - +% % Standalone accent \DeclareUnicodeCharacter{0384}{\ensuremath{\acute{\ }}} - +% % Greek letters lower case \DeclareUnicodeCharacter{03B1}{\ensuremath\alpha} \DeclareUnicodeCharacter{03B2}{\ensuremath\beta} @@ -10294,19 +10371,19 @@ \DeclareUnicodeCharacter{03C7}{\ensuremath\chi} \DeclareUnicodeCharacter{03C8}{\ensuremath\psi} \DeclareUnicodeCharacter{03C9}{\ensuremath\omega} - +% % More Greek vowels with accents \DeclareUnicodeCharacter{03CA}{\ensuremath{\ddot\iota}} \DeclareUnicodeCharacter{03CB}{\ensuremath{\ddot\upsilon}} \DeclareUnicodeCharacter{03CC}{\ensuremath{\acute o}} \DeclareUnicodeCharacter{03CD}{\ensuremath{\acute\upsilon}} \DeclareUnicodeCharacter{03CE}{\ensuremath{\acute\omega}} - +% % Variant Greek letters \DeclareUnicodeCharacter{03D1}{\ensuremath\vartheta} \DeclareUnicodeCharacter{03D6}{\ensuremath\varpi} \DeclareUnicodeCharacter{03F1}{\ensuremath\varrho} - +% \DeclareUnicodeCharacter{1E02}{\dotaccent{B}} \DeclareUnicodeCharacter{1E03}{\dotaccent{b}} \DeclareUnicodeCharacter{1E04}{\udotaccent{B}} @@ -10319,10 +10396,10 @@ \DeclareUnicodeCharacter{1E0D}{\udotaccent{d}} \DeclareUnicodeCharacter{1E0E}{\ubaraccent{D}} \DeclareUnicodeCharacter{1E0F}{\ubaraccent{d}} - +% \DeclareUnicodeCharacter{1E1E}{\dotaccent{F}} \DeclareUnicodeCharacter{1E1F}{\dotaccent{f}} - +% \DeclareUnicodeCharacter{1E20}{\=G} \DeclareUnicodeCharacter{1E21}{\=g} \DeclareUnicodeCharacter{1E22}{\dotaccent{H}} @@ -10331,7 +10408,7 @@ \DeclareUnicodeCharacter{1E25}{\udotaccent{h}} \DeclareUnicodeCharacter{1E26}{\"H} \DeclareUnicodeCharacter{1E27}{\"h} - +% \DeclareUnicodeCharacter{1E30}{\'K} \DeclareUnicodeCharacter{1E31}{\'k} \DeclareUnicodeCharacter{1E32}{\udotaccent{K}} @@ -10344,7 +10421,7 @@ \DeclareUnicodeCharacter{1E3B}{\ubaraccent{l}} \DeclareUnicodeCharacter{1E3E}{\'M} \DeclareUnicodeCharacter{1E3F}{\'m} - +% \DeclareUnicodeCharacter{1E40}{\dotaccent{M}} \DeclareUnicodeCharacter{1E41}{\dotaccent{m}} \DeclareUnicodeCharacter{1E42}{\udotaccent{M}} @@ -10355,7 +10432,7 @@ \DeclareUnicodeCharacter{1E47}{\udotaccent{n}} \DeclareUnicodeCharacter{1E48}{\ubaraccent{N}} \DeclareUnicodeCharacter{1E49}{\ubaraccent{n}} - +% \DeclareUnicodeCharacter{1E54}{\'P} \DeclareUnicodeCharacter{1E55}{\'p} \DeclareUnicodeCharacter{1E56}{\dotaccent{P}} @@ -10366,7 +10443,7 @@ \DeclareUnicodeCharacter{1E5B}{\udotaccent{r}} \DeclareUnicodeCharacter{1E5E}{\ubaraccent{R}} \DeclareUnicodeCharacter{1E5F}{\ubaraccent{r}} - +% \DeclareUnicodeCharacter{1E60}{\dotaccent{S}} \DeclareUnicodeCharacter{1E61}{\dotaccent{s}} \DeclareUnicodeCharacter{1E62}{\udotaccent{S}} @@ -10377,12 +10454,12 @@ \DeclareUnicodeCharacter{1E6D}{\udotaccent{t}} \DeclareUnicodeCharacter{1E6E}{\ubaraccent{T}} \DeclareUnicodeCharacter{1E6F}{\ubaraccent{t}} - +% \DeclareUnicodeCharacter{1E7C}{\~V} \DeclareUnicodeCharacter{1E7D}{\~v} \DeclareUnicodeCharacter{1E7E}{\udotaccent{V}} \DeclareUnicodeCharacter{1E7F}{\udotaccent{v}} - +% \DeclareUnicodeCharacter{1E80}{\`W} \DeclareUnicodeCharacter{1E81}{\`w} \DeclareUnicodeCharacter{1E82}{\'W} @@ -10399,7 +10476,7 @@ \DeclareUnicodeCharacter{1E8D}{\"x} \DeclareUnicodeCharacter{1E8E}{\dotaccent{Y}} \DeclareUnicodeCharacter{1E8F}{\dotaccent{y}} - +% \DeclareUnicodeCharacter{1E90}{\^Z} \DeclareUnicodeCharacter{1E91}{\^z} \DeclareUnicodeCharacter{1E92}{\udotaccent{Z}} @@ -10410,30 +10487,30 @@ \DeclareUnicodeCharacter{1E97}{\"t} \DeclareUnicodeCharacter{1E98}{\ringaccent{w}} \DeclareUnicodeCharacter{1E99}{\ringaccent{y}} - +% \DeclareUnicodeCharacter{1EA0}{\udotaccent{A}} \DeclareUnicodeCharacter{1EA1}{\udotaccent{a}} - +% \DeclareUnicodeCharacter{1EB8}{\udotaccent{E}} \DeclareUnicodeCharacter{1EB9}{\udotaccent{e}} \DeclareUnicodeCharacter{1EBC}{\~E} \DeclareUnicodeCharacter{1EBD}{\~e} - +% \DeclareUnicodeCharacter{1ECA}{\udotaccent{I}} \DeclareUnicodeCharacter{1ECB}{\udotaccent{i}} \DeclareUnicodeCharacter{1ECC}{\udotaccent{O}} \DeclareUnicodeCharacter{1ECD}{\udotaccent{o}} - +% \DeclareUnicodeCharacter{1EE4}{\udotaccent{U}} \DeclareUnicodeCharacter{1EE5}{\udotaccent{u}} - +% \DeclareUnicodeCharacter{1EF2}{\`Y} \DeclareUnicodeCharacter{1EF3}{\`y} \DeclareUnicodeCharacter{1EF4}{\udotaccent{Y}} - +% \DeclareUnicodeCharacter{1EF8}{\~Y} \DeclareUnicodeCharacter{1EF9}{\~y} - +% % Punctuation \DeclareUnicodeCharacter{2013}{--} \DeclareUnicodeCharacter{2014}{---} @@ -10450,12 +10527,12 @@ \DeclareUnicodeCharacter{2026}{\dots} \DeclareUnicodeCharacter{2039}{\guilsinglleft} \DeclareUnicodeCharacter{203A}{\guilsinglright} - +% \DeclareUnicodeCharacter{20AC}{\euro} - +% \DeclareUnicodeCharacter{2192}{\expansion} \DeclareUnicodeCharacter{21D2}{\result} - +% % Mathematical symbols \DeclareUnicodeCharacter{2200}{\ensuremath\forall} \DeclareUnicodeCharacter{2203}{\ensuremath\exists} @@ -10471,7 +10548,7 @@ \DeclareUnicodeCharacter{2265}{\ensuremath\geq} \DeclareUnicodeCharacter{2282}{\ensuremath\subset} \DeclareUnicodeCharacter{2287}{\ensuremath\supseteq} - +% \DeclareUnicodeCharacter{2016}{\ensuremath\Vert} \DeclareUnicodeCharacter{2032}{\ensuremath\prime} \DeclareUnicodeCharacter{210F}{\ensuremath\hbar} @@ -10571,7 +10648,7 @@ \DeclareUnicodeCharacter{230B}{\ensuremath\rfloor} \DeclareUnicodeCharacter{2322}{\ensuremath\frown} \DeclareUnicodeCharacter{2323}{\ensuremath\smile} - +% \DeclareUnicodeCharacter{25A1}{\ensuremath\Box} \DeclareUnicodeCharacter{25B3}{\ensuremath\triangle} \DeclareUnicodeCharacter{25B7}{\ensuremath\triangleright} @@ -10603,17 +10680,45 @@ \DeclareUnicodeCharacter{2A3F}{\ensuremath\amalg} \DeclareUnicodeCharacter{2AAF}{\ensuremath\preceq} \DeclareUnicodeCharacter{2AB0}{\ensuremath\succeq} - +% \global\mathchardef\checkmark="1370 % actually the square root sign \DeclareUnicodeCharacter{2713}{\ensuremath\checkmark} -}% end of \utfeightchardefs +}% end of \unicodechardefs + +% UTF-8 byte sequence (pdfTeX) definitions (replacing and @U command) +% It makes the setting that replace UTF-8 byte sequence. +\def\utfeightchardefs{% + \let\DeclareUnicodeCharacter\DeclareUnicodeCharacterUTFviii + \unicodechardefs +} + +% Native Unicode (XeTeX and LuaTeX) character replacing definitions +% It makes the setting that replace the Unicode characters. +\def\nativeunicodechardefs{% + \let\DeclareUnicodeCharacter\DeclareUnicodeCharacterNative + \unicodechardefs +} + +% Native Unicode (XeTeX and LuaTeX) character ``through'' definitions +% It makes the setting that does not replace the Unicode characters. +\def\nativeunicodechardefsthru{% + \let\DeclareUnicodeCharacter\DeclareUnicodeCharacterNativeThru + \unicodechardefs +} + +% Native Unicode (XeTeX and LuaTeX) @U command definitions +\def\nativeunicodechardefsatu{% + \let\DeclareUnicodeCharacter\DeclareUnicodeCharacterNativeAtU + \unicodechardefs +} % US-ASCII character definitions. \def\asciichardefs{% nothing need be done \relax } -% Latin1 (ISO-8859-1) character definitions. +% Non-ASCII bytes ``through'' definitions. +% It makes the setting that does not replace the non-ASCII byte. \def\nonasciistringdefs{% \setnonasciicharscatcode\active \def\defstringchar##1{\def##1{\string##1}}% @@ -10659,9 +10764,23 @@ \defstringchar^^fc\defstringchar^^fd\defstringchar^^fe\defstringchar^^ff% } +% Character ``through'' definitions. +% It makes the setting that does not replace the characters. +\def\throughcharactersdefs{% + \iftxiusebytewiseio + \nonasciistringdefs + \else + \nativeunicodechardefsthru + \fi +} + % define all the unicode characters we know about, for the sake of @U. -\utfeightchardefs +\iftxinativeunicodecapable + \nativeunicodechardefsatu +\else + \utfeightchardefs +\fi % Make non-ASCII characters printable again for compatibility with @@ -11010,7 +11129,7 @@ % {@catcode`- = @active @gdef@normalturnoffactive{% - @nonasciistringdefs + @throughcharactersdefs @let-=@normaldash @let"=@normaldoublequote @let$=@normaldollar %$ font-lock fix