* src/preproc/eqn/text.cpp: Remove struct map, entity_table,
  and special_to_entity.  Include "unicode.h" header file.
  (special_char_box::output): Instead of named entity reference,
  print XML character reference with Unicode codepoint for MathML.

References: https://www.w3.org/TR/REC-xml/#sec-references
---
 src/preproc/eqn/text.cpp | 382 +--------------------------------------
 1 file changed, 3 insertions(+), 379 deletions(-)

diff --git a/src/preproc/eqn/text.cpp b/src/preproc/eqn/text.cpp
index 19b2e8f684ab..e95056ba3e4a 100644
--- a/src/preproc/eqn/text.cpp
+++ b/src/preproc/eqn/text.cpp
@@ -27,383 +27,7 @@ along with this program.  If not, see 
<http://www.gnu.org/licenses/>. */
 #include "eqn.h"
 #include "pbox.h"
 #include "ptable.h"
-
-struct map {
-  const char *from;
-  const char *to;
-};
-
-struct map entity_table[] = {
-  // Classic troff special characters
-  {"%", "&shy;"},      // ISOnum
-  {"'", "&acute;"},    // ISOdia
-  {"!=", "&ne;"},      // ISOtech
-  {"**", "&lowast;"},  // ISOtech
-  {"*a", "&alpha;"},   // ISOgrk3
-  {"*A", "A"},
-  {"*b", "&beta;"},    // ISOgrk3
-  {"*B", "B"},
-  {"*d", "&delta;"},   // ISOgrk3
-  {"*D", "&Delta;"},   // ISOgrk3
-  {"*e", "&epsilon;"}, // ISOgrk3
-  {"*E", "E"},
-  {"*f", "&phi;"},     // ISOgrk3
-  {"*F", "&Phi;"},     // ISOgrk3
-  {"*g", "&gamma;"},   // ISOgrk3
-  {"*G", "&Gamma;"},   // ISOgrk3
-  {"*h", "&theta;"},   // ISOgrk3
-  {"*H", "&Theta;"},   // ISOgrk3
-  {"*i", "&iota;"},    // ISOgrk3
-  {"*I", "I"},
-  {"*k", "&kappa;"},   // ISOgrk3
-  {"*K", "K;"},
-  {"*l", "&lambda;"},  // ISOgrk3
-  {"*L", "&Lambda;"},  // ISOgrk3
-  {"*m", "&mu;"},      // ISOgrk3
-  {"*M", "M"},
-  {"*n", "&nu;"},      // ISOgrk3
-  {"*N", "N"},
-  {"*o", "o"},
-  {"*O", "O"},
-  {"*p", "&pi;"},      // ISOgrk3
-  {"*P", "&Pi;"},      // ISOgrk3
-  {"*q", "&psi;"},     // ISOgrk3
-  {"*Q", "&PSI;"},     // ISOgrk3
-  {"*r", "&rho;"},     // ISOgrk3
-  {"*R", "R"},
-  {"*s", "&sigma;"},   // ISOgrk3
-  {"*S", "&Sigma;"},   // ISOgrk3
-  {"*t", "&tau;"},     // ISOgrk3
-  {"*T", "&Tau;"},     // ISOgrk3
-  {"*u", "&upsilon;"}, // ISOgrk3
-  {"*U", "&Upsilon;"}, // ISOgrk3
-  {"*w", "&omega;"},   // ISOgrk3
-  {"*W", "&Omega;"},   // ISOgrk3
-  {"*x", "&chi;"},     // ISOgrk3
-  {"*X", "&Chi;"},     // ISOgrk3
-  {"*y", "&eta;"},     // ISOgrk3
-  {"*Y", "&Eta;"},     // ISOgrk3
-  {"*z", "&zeta;"},    // ISOgrk3
-  {"*Z", "&Zeta;"},    // ISOgrk3
-  {"+-", "&plusmn;"},  // ISOnum
-  {"->", "&rarr;"},    // ISOnum
-  {"12", "&frac12;"},  // ISOnum
-  {"14", "&frac14;"},  // ISOnum
-  {"34", "&frac34;"},  // ISOnum
-  {"<-", "&larr;"},    // ISOnum
-  {"==", "&equiv;"},   // ISOtech
-  {"Fi", "&ffilig;"},  // ISOpub
-  {"Fl", "&ffllig;"},  // ISOpub
-  {"aa", "&acute;"},   // ISOdia
-  {"ap", "&sim;"},     // ISOtech
-  {"bl", "&phonexb;"}, // ISOpub
-  {"br", "&boxv;"},    // ISObox
-  {"bs", "&phone;"},   // ISOpub (for the Bell logo)
-  {"bu", "&bull;"},    // ISOpub
-  {"bv", "&verbar;"},  // ISOnum
-  {"ca", "&cap;"},     // ISOtech
-  {"ci", "&cir;"},     // ISOpub
-  {"co", "&copy;"},    // ISOnum
-  {"ct", "&cent;"},    // ISOnum
-  {"cu", "&cup;"},     // ISOtech
-  {"da", "&darr;"},    // ISOnum
-  {"de", "&deg;"},     // ISOnum
-  {"dg", "&dagger;"},  // ISOpub
-  {"dd", "&Dagger;"},  // ISOpub
-  {"di", "&divide;"},  // ISOnum
-  {"em", "&mdash;"},   // ISOpub
-  {"eq", "&equals;"},  // ISOnum
-  {"es", "&empty;"},   // ISOamso
-  {"ff", "&fflig;"},   // ISOpub
-  {"fi", "&filig;"},   // ISOpub
-  {"fl", "&fllig;"},   // ISOpub
-  {"fm", "&prime;"},   // ISOtech
-  {"ge", "&ge;"},      // ISOtech
-  {"gr", "&nabla;"},   // ISOtech
-  {"hy", "&hyphen;"},  // ISOnum
-  {"ib", "&sube;"},    // ISOtech
-  {"if", "&infin;"},   // ISOtech
-  {"ip", "&supe;"},    // ISOtech
-  {"is", "&int;"},     // ISOtech
-  {"le", "&le;"},      // ISOtech
-  // Some pile characters go here
-  {"mi", "&minus;"},   // ISOtech
-  {"mo", "&isin;"},    // ISOtech
-  {"mu", "&times;"},   // ISOnum
-  {"no", "&not;"},     // ISOnum
-  {"or", "&verbar;"},  // ISOnum
-  {"pl", "&plus;"},    // ISOnum
-  {"pt", "&prop;"},    // ISOtech
-  {"rg", "&trade;"},   // ISOnum
-  // More pile characters go here
-  {"rn", "&macr;"},    // ISOdia
-  {"ru", "&lowbar;"},  // ISOnum
-  {"sb", "&sub;"},     // ISOtech
-  {"sc", "&sect;"},    // ISOnum
-  {"sl", "/"},
-  {"sp", "&sup;"},     // ISOtech
-  {"sq", "&squf;"},    // ISOpub
-  {"sr", "&radic;"},   // ISOtech
-  {"ts", "&sigmav;"},  // ISOgrk3
-  {"ua", "&uarr;"},    // ISOnum
-  {"ul", "_"},
-  {"~=", "&cong;"},    // ISOtech
-  // Extended specials supported by groff; see groff_char(7).
-  // These are listed in the order they occur on that man page.
-  {"-D", "&ETH;"},     // ISOlat: Icelandic uppercase eth
-  {"Sd", "&eth;"},     // ISOlat1: Icelandic lowercase eth
-  {"TP", "&THORN;"},   // ISOlat1: Icelandic uppercase thorn
-  {"Tp", "&thorn;"},   // ISOlat1: Icelandic lowercase thorn
-  {"ss", "&szlig;"},   // ISOlat1
-  // Ligatures
-  // ff, fi, fl, ffi, ffl from old troff go here
-  {"AE", "&AElig;"},   // ISOlat1
-  {"ae", "&aelig;"},   // ISOlat1
-  {"OE", "&OElig;"},   // ISOlat2
-  {"oe", "&oelig;"},   // ISOlat2
-  {"IJ", "&ijlig;"},   // ISOlat2: Dutch IJ ligature
-  {"ij", "&IJlig;"},   // ISOlat2: Dutch ij ligature
-  {".i", "&inodot;"},  // ISOlat2,ISOamso
-  {".j", "&jnodot;"},  // ISOamso (undocumented but in 1.19)
-  // Accented characters
-  {"'A", "&Aacute;"},  // ISOlat1
-  {"'C", "&Cacute;"},  // ISOlat2
-  {"'E", "&Eacute;"},  // ISOlat1
-  {"'I", "&Iacute;"},  // ISOlat1
-  {"'O", "&Oacute;"},  // ISOlat1
-  {"'U", "&Uacute;"},  // ISOlat1
-  {"'Y", "&Yacute;"},  // ISOlat1
-  {"'a", "&aacute;"},  // ISOlat1
-  {"'c", "&cacute;"},  // ISOlat2
-  {"'e", "&eacute;"},  // ISOlat1
-  {"'i", "&iacute;"},  // ISOlat1
-  {"'o", "&oacute;"},  // ISOlat1
-  {"'u", "&uacute;"},  // ISOlat1
-  {"'y", "&yacute;"},  // ISOlat1
-  {":A", "&Auml;"},    // ISOlat1
-  {":E", "&Euml;"},    // ISOlat1
-  {":I", "&Iuml;"},    // ISOlat1
-  {":O", "&Ouml;"},    // ISOlat1
-  {":U", "&Uuml;"},    // ISOlat1
-  {":Y", "&Yuml;"},    // ISOlat2
-  {":a", "&auml;"},    // ISOlat1
-  {":e", "&euml;"},    // ISOlat1
-  {":i", "&iuml;"},    // ISOlat1
-  {":o", "&ouml;"},    // ISOlat1
-  {":u", "&uuml;"},    // ISOlat1
-  {":y", "&yuml;"},    // ISOlat1
-  {"^A", "&Acirc;"},   // ISOlat1
-  {"^E", "&Ecirc;"},   // ISOlat1
-  {"^I", "&Icirc;"},   // ISOlat1
-  {"^O", "&Ocirc;"},   // ISOlat1
-  {"^U", "&Ucirc;"},   // ISOlat1
-  {"^a", "&acirc;"},   // ISOlat1
-  {"^e", "&ecirc;"},   // ISOlat1
-  {"^i", "&icirc;"},   // ISOlat1
-  {"^o", "&ocirc;"},   // ISOlat1
-  {"^u", "&ucirc;"},   // ISOlat1
-  {"`A", "&Agrave;"},  // ISOlat1
-  {"`E", "&Egrave;"},  // ISOlat1
-  {"`I", "&Igrave;"},  // ISOlat1
-  {"`O", "&Ograve;"},  // ISOlat1
-  {"`U", "&Ugrave;"},  // ISOlat1
-  {"`a", "&agrave;"},  // ISOlat1
-  {"`e", "&egrave;"},  // ISOlat1
-  {"`i", "&igrave;"},  // ISOlat1
-  {"`o", "&ograve;"},  // ISOlat1
-  {"`u", "&ugrave;"},  // ISOlat1
-  {"~A", "&Atilde;"},  // ISOlat1
-  {"~N", "&Ntilde;"},  // ISOlat1
-  {"~O", "&Otilde;"},  // ISOlat1
-  {"~a", "&atilde;"},  // ISOlat1
-  {"~n", "&ntilde;"},  // ISOlat1
-  {"~o", "&otilde;"},  // ISOlat1
-  {"vS", "&Scaron;"},  // ISOlat2
-  {"vs", "&scaron;"},  // ISOlat2
-  {"vZ", "&Zcaron;"},  // ISOlat2
-  {"vz", "&zcaron;"},  // ISOlat2
-  {",C", "&Ccedil;"},  // ISOlat1
-  {",c", "&ccedil;"},  // ISOlat1
-  {"/L", "&Lstrok;"},  // ISOlat2: Polish L with a slash
-  {"/l", "&lstrok;"},  // ISOlat2: Polish l with a slash
-  {"/O", "&Oslash;"},  // ISOlat1
-  {"/o", "&oslash;"},  // ISOlat1
-  {"oA", "&Aring;"},   // ISOlat1
-  {"oa", "&aring;"},   // ISOlat1
-  // Accents
-  {"a\"","&dblac;"},   // ISOdia: double acute accent (Hungarian umlaut)
-  {"a-", "&macr;"},    // ISOdia: macron or bar accent
-  {"a.", "&dot;"},     // ISOdia: dot above
-  {"a^", "&circ;"},    // ISOdia: circumflex accent
-  {"aa", "&acute;"},   // ISOdia: acute accent
-  {"ga", "&grave;"},   // ISOdia: grave accent
-  {"ab", "&breve;"},   // ISOdia: breve accent
-  {"ac", "&cedil;"},   // ISOdia: cedilla accent
-  {"ad", "&uml;"},     // ISOdia: umlaut or dieresis
-  {"ah", "&caron;"},   // ISOdia: caron (aka hacek accent)
-  {"ao", "&ring;"},    // ISOdia: ring or circle accent
-  {"a~", "&tilde;"},   // ISOdia: tilde accent
-  {"ho", "&ogon;"},    // ISOdia: hook or ogonek accent
-  {"ha", "^"},         // ASCII circumflex, hat, caret
-  {"ti", "~"},         // ASCII tilde, large tilde
-  // Quotes
-  {"Bq", "&lsquor;"},  // ISOpub: low double comma quote
-  {"bq", "&ldquor;"},  // ISOpub: low single comma quote
-  {"lq", "&ldquo;"},   // ISOnum
-  {"rq", "&rdquo;"},   // ISOpub
-  {"oq", "&lsquo;"},   // ISOnum: single open quote
-  {"cq", "&rsquo;"},   // ISOnum: single closing quote (ASCII 39)
-  {"aq", "&zerosp;'"}, // apostrophe quote
-  {"dq", "\""},                // double quote (ASCII 34)
-  {"Fo", "&laquo;"},   // ISOnum
-  {"Fc", "&raquo;"},   // ISOnum
-  //{"fo", "&fo;"},
-  //{"fc", "&fc;"},
-  // Punctuation
-  {"r!", "&iexcl;"},   // ISOnum
-  {"r?", "&iquest;"},  // ISOnum
-  // Old troff \(em goes here
-  {"en", "&ndash;"},   // ISOpub: en dash
-  // Old troff \(hy goes here 
-  // Brackets
-  {"lB", "&lsqb;"},    // ISOnum: left (square) bracket
-  {"rB", "&rsqb;"},    // ISOnum: right (square) bracket
-  {"lC", "&lcub;"},    // ISOnum: left (curly) brace
-  {"rC", "&rcub;"},    // ISOnum: right (curly) brace
-  {"la", "&lang;"},    // ISOtech: left angle bracket
-  {"ra", "&rang;"},    // ISOtech: right angle bracket
-  // Old troff \(bv goes here
-  // Bracket-pile characters could go here.
-  // Arrows
-  // Old troff \(<- and \(-> go here
-  {"<>", "&harr;"},    // ISOamsa
-  {"da", "&darr;"},    // ISOnum
-  {"ua", "&uarr;"},    // ISOnum
-  {"lA", "&lArr;"},    // ISOtech
-  {"rA", "&rArr;"},    // ISOtech
-  {"hA", "&iff;"},     // ISOtech: horizontal double-headed arrow
-  {"dA", "&dArr;"},    // ISOamsa
-  {"uA", "&uArr;"},    // ISOamsa
-  {"vA", "&vArr;"},    // ISOamsa: vertical double-headed double arrow
-  //{"an", "&an;"},
-  // Lines
-  {"-h", "&planck;"},  // ISOamso: h-bar (Planck's constant)
-  // Old troff \(or goes here
-  {"ba", "&verbar;"},  // ISOnum
-  // Old troff \(br, \{u, \(ul, \(bv go here
-  {"bb", "&brvbar;"},  // ISOnum
-  {"sl", "/"},
-  {"rs", "&bsol;"},    // ISOnum
-  // Text markers
-  // Old troff \(ci, \(bu, \(dd, \(dg go here
-  {"lz", "&loz;"},     // ISOpub
-  // Old troff sq goes here
-  {"ps", "&para;"},    // ISOnum: paragraph or pilcrow sign
-  {"sc", "&sect;"},    // ISOnum (in old troff)
-  // Old troff \(lh, \{h go here
-  {"at", "&commat;"},  // ISOnum
-  {"sh", "&num;"},     // ISOnum
-  //{"CR", "&CR;"},
-  {"OK", "&check;"},   // ISOpub
-  // Legalize
-  // Old troff \(co, \{g go here
-  {"tm", "&trade;"},   // ISOnum
-  // Currency symbols
-  {"Do", "&dollar;"},  // ISOnum
-  {"ct", "&cent;"},    // ISOnum
-  {"eu", "&euro;"},
-  {"Eu", "&euro;"},
-  {"Ye", "&yen;"},     // ISOnum
-  {"Po", "&pound;"},   // ISOnum
-  {"Cs", "&curren;"},  // ISOnum: currency sign
-  {"Fn", "&fnof"},     // ISOtech
-  // Units
-  // Old troff de goes here
-  {"%0", "&permil;"},  // ISOtech: per thousand, per mille sign
-  // Old troff \(fm goes here
-  {"sd", "&Prime;"},   // ISOtech
-  {"mc", "&micro;"},   // ISOnum
-  {"Of", "&ordf;"},    // ISOnum
-  {"Om", "&ordm;"},    // ISOnum
-  // Logical symbols
-  {"AN", "&and;"},     // ISOtech
-  {"OR", "&or;"},      // ISOtech
-  // Old troff \(no goes here
-  {"te", "&exist;"},   // ISOtech: there exists, existential quantifier
-  {"fa", "&forall;"},  // ISOtech: for all, universal quantifier
-  {"st", "&bepsi"},    // ISOamsr: such that
-  {"3d", "&there4;"},  // ISOtech
-  {"tf", "&there4;"},  // ISOtech
-  // Mathematical symbols
-  // Old troff "12", "14", "34" goes here
-  {"S1", "&sup1;"},    // ISOnum
-  {"S2", "&sup2;"},    // ISOnum
-  {"S3", "&sup3;"},    // ISOnum
-  // Old troff \(pl", \-, \(+- go here
-  {"t+-", "&plusmn;"}, // ISOnum
-  {"-+", "&mnplus;"},  // ISOtech
-  {"pc", "&middot;"},  // ISOnum
-  {"md", "&middot;"},  // ISOnum
-  // Old troff \(mu goes here
-  {"tmu", "&times;"},  // ISOnum
-  {"c*", "&otimes;"},  // ISOamsb: multiply sign in a circle
-  {"c+", "&oplus;"},   // ISOamsb: plus sign in a circle
-  // Old troff \(di goes here
-  {"tdi", "&divide;"}, // ISOnum
-  {"f/", "&horbar;"},  // ISOnum: horizontal bar for fractions
-  // Old troff \(** goes here
-  {"<=", "&le;"},      // ISOtech
-  {">=", "&ge;"},      // ISOtech
-  {"<<", "&Lt;"},      // ISOamsr
-  {">>", "&Gt;"},      // ISOamsr
-  {"!=", "&ne;"},      // ISOtech
-  // Old troff \(eq and \(== go here
-  {"=~", "&cong;"},    // ISOamsr
-  // Old troff \(ap goes here
-  {"~~", "&ap;"},      // ISOtech
-  // This appears to be an error in the groff table.  
-  // It clashes with the Bell Labs use of ~= for a congruence sign
-  // {"~=", "&ap;"},   // ISOamsr
-  // Old troff \(pt, \(es, \(mo go here
-  {"nm", "&notin;"},   // ISOtech
-  {"nb", "&nsub;"},    // ISOamsr
-  {"nc", "&nsup;"},    // ISOamsn
-  {"ne", "&nequiv;"},  // ISOamsn
-  // Old troff \(sb, \(sp, \(ib, \(ip, \(ca, \(cu go here
-  {"/_", "&ang;"},     // ISOamso
-  {"pp", "&perp;"},    // ISOtech
-  // Old troff \(is goes here
-  {"sum", "&sum;"},    // ISOamsb
-  {"product", "&prod;"},       // ISOamsb
-  {"gr", "&nabla;"},   // ISOtech
-  // Old troff \(sr. \{n, \(if go here
-  {"Ah", "&aleph;"},   // ISOtech
-  {"Im", "&image;"},   // ISOamso: Fraktur I, imaginary
-  {"Re", "&real;"},    // ISOamso: Fraktur R, real
-  {"wp", "&weierp;"},  // ISOamso
-  {"pd", "&part;"},    // ISOtech: partial differentiation sign
-  // Their table duplicates the Greek letters here.
-  // We list only the variant forms here, mapping them into
-  // the ISO Greek 4 variants (which may or may not be correct :-() 
-  {"+f", "&b.phiv;"},  // ISOgrk4: variant phi
-  {"+h", "&b.thetas;"},        // ISOgrk4: variant theta
-  {"+p", "&b.omega;"}, // ISOgrk4: variant pi, looking like omega
-  // Card symbols
-  {"CL", "&clubs;"},   // ISOpub: club suit
-  {"SP", "&spades;"},  // ISOpub: spade suit
-  {"HE", "&hearts;"},  // ISOpub: heart suit
-  {"DI", "&diams;"},   // ISOpub: diamond suit
-};
-
-const char *special_to_entity(const char *sp)
-{
-  struct map *mp;
-  for (mp = entity_table; mp < entity_table + countof(entity_table); mp++) {
-    if (strcmp(mp->from, sp) == 0)
-      return mp->to;
-  }
-  return NULL;
-}
+#include "unicode.h"
 
 class char_box : public simple_box {
   unsigned char c;
@@ -665,9 +289,9 @@ void special_char_box::output()
       printf("\\fP");
   }
   else if (output_format == mathml) {
-    const char *entity = special_to_entity(s);
+    const char *entity = glyph_name_to_unicode(s);
     if (entity != NULL)
-      printf("<mo>%s</mo>", entity);
+      printf("<mo>&#x%s;</mo>", entity);
     else
       printf("<merror>unknown eqn/troff special char %s</merror>", s);
   }
-- 
2.52.0


  • ... Nguyễn Gia Phong via discussion of the GNU roff typesetting system and related software
    • ... Nguyễn Gia Phong via discussion of the GNU roff typesetting system and related software
      • ... Nguyễn Gia Phong via discussion of the GNU roff typesetting system and related software

Reply via email to