http://d.puremagic.com/issues/show_bug.cgi?id=5221
--- Comment #2 from Iain Buclaw <ibuc...@ubuntu.com> 2010-11-16 06:07:53 PST --- (From update of attachment 815) diff -ur src.orig/entity.c src/entity.c --- src.orig/entity.c 2010-03-31 01:26:18.000000000 +0100 +++ src/entity.c 2010-11-16 14:01:58.423055202 +0000 @@ -9,6 +9,7 @@ #include <string.h> +#include <ctype.h> /********************************************* * Convert from named entity to its encoding. @@ -23,7 +24,6 @@ unsigned short value; }; -#if IN_GCC static NameId namesA[]={ "Aacgr", 0x0386, "aacgr", 0x03AC, @@ -42,7 +42,9 @@ "agr", 0x03B1, "Agrave", 0x00C0, "agrave", 0x00E0, + "alefsym", 0x2135, "aleph", 0x2135, + "Alpha", 0x0391, "alpha", 0x03B1, "Amacr", 0x0100, "amacr", 0x0101, @@ -76,9 +78,11 @@ "bcong", 0x224C, "Bcy", 0x0411, "bcy", 0x0431, + "bdquo", 0x201E, "becaus", 0x2235, "bepsi", 0x220D, "bernou", 0x212C, + "Beta", 0x0392, "beta", 0x03B2, "beth", 0x2136, "Bgr", 0x0392, @@ -162,6 +166,7 @@ "CHcy", 0x0427, "chcy", 0x0447, "check", 0x2713, + "Chi", 0x03A7, "chi", 0x03C7, "cir", 0x25CB, "circ", 0x005E, @@ -178,6 +183,7 @@ "coprod", 0x2210, "copy", 0x00A9, "copysr", 0x2117, + "crarr", 0x21B5, "cross", 0x2717, "cuepr", 0x22DE, "cuesc", 0x22DF, @@ -281,17 +287,21 @@ "Eogon", 0x0118, "eogon", 0x0119, "epsi", 0x220A, + "Epsilon", 0x0395, + "epsilon", 0x03B5, "epsis", 0x220A, "epsiv", 0x03B5, "equals", 0x003D, "equiv", 0x2261, "erDot", 0x2253, "esdot", 0x2250, + "Eta", 0x0397, "eta", 0x03B7, "ETH", 0x00D0, "eth", 0x00F0, "Euml", 0x00CB, "euml", 0x00EB, + "euro", 0x20AC, "excl", 0x0021, "exist", 0x2203, NULL, 0 @@ -325,6 +335,7 @@ "frac56", 0x215A, "frac58", 0x215D, "frac78", 0x215E, + "frasl", 0x2044, "frown", 0x2322, NULL, 0 }; @@ -425,6 +436,7 @@ "iocy", 0x0451, "Iogon", 0x012E, "iogon", 0x012F, + "Iota", 0x0399, "iota", 0x03B9, "iquest", 0x00BF, "isin", 0x220A, @@ -450,6 +462,7 @@ }; static NameId namesK[]={ + "Kappa", 0x039A, "kappa", 0x03BA, "kappav", 0x03F0, "Kcedil", 0x0136, @@ -523,7 +536,9 @@ "lozf", 0x2726, "lpar", 0x0028, "lrarr2", 0x21C6, + "lrm", 0x200E, "lrhar2", 0x21CB, + "lsaquo", 0x2039, "lsh", 0x21B0, "lsim", 0x2272, "lsqb", 0x005B, @@ -561,6 +576,7 @@ "mldr", 0x2026, "mnplus", 0x2213, "models", 0x22A7, + "Mu", 0x039C, "mu", 0x03BC, "mumap", 0x22B8, NULL, 0 @@ -573,8 +589,7 @@ "nap", 0x2249, "napos", 0x0149, "natur", 0x266E, -// "nbsp", 0x00A0, - "nbsp", 32, // make non-breaking space appear as space + "nbsp", 0x00A0, "Ncaron", 0x0147, "ncaron", 0x0148, "Ncedil", 0x0145, @@ -631,6 +646,7 @@ "nsupE", 0x2289, "Ntilde", 0x00D1, "ntilde", 0x00F1, + "Nu", 0x039D, "nu", 0x03BD, "num", 0x0023, "numero", 0x2116, @@ -671,10 +687,13 @@ "ohgr", 0x03C9, "ohm", 0x2126, "olarr", 0x21BA, + "oline", 0x203E, "Omacr", 0x014C, "omacr", 0x014D, "Omega", 0x03A9, "omega", 0x03C9, + "Omicron", 0x039F, + "omicron", 0x03BF, "ominus", 0x2296, "oplus", 0x2295, "or", 0x2228, @@ -709,6 +728,7 @@ "PHgr", 0x03A6, "phgr", 0x03C6, "Phi", 0x03A6, + "phi", 0x03C6, "phis", 0x03C6, "phiv", 0x03D5, "phmmat", 0x2133, @@ -780,13 +800,16 @@ "rgr", 0x03C1, "rhard", 0x21C1, "rharu", 0x21C0, + "Rho", 0x03A1, "rho", 0x03C1, "rhov", 0x03F1, "ring", 0x02DA, "rlarr2", 0x21C4, "rlhar2", 0x21CC, + "rlm", 0x200F, "rpar", 0x0029, "rpargt", 0xE291, + "rsaquo", 0x203A, "rsh", 0x21B1, "rsqb", 0x005D, "rsquo", 0x2019, @@ -804,6 +827,7 @@ "Sacute", 0x015A, "sacute", 0x015B, "samalg", 0x2210, + "sbquo", 0x201A, "sbsol", 0xFE68, "sc", 0x227B, "scap", 0x227F, @@ -839,6 +863,7 @@ "shy", 0x00AD, "Sigma", 0x03A3, "sigma", 0x03C3, + "sigmaf", 0x03C2, "sigmav", 0x03C2, "sim", 0x223C, "sime", 0x2243, @@ -886,6 +911,7 @@ static NameId namesT[]={ "target", 0x2316, + "Tau", 0x03A4, "tau", 0x03C4, "Tcaron", 0x0164, "tcaron", 0x0165, @@ -899,7 +925,9 @@ "tgr", 0x03C4, "there4", 0x2234, "Theta", 0x0398, + "theta", 0x03B8, "thetas", 0x03B8, + "thetasym", 0x03D1, "thetav", 0x03D1, "THgr", 0x0398, "thgr", 0x03B8, @@ -961,8 +989,11 @@ "Uogon", 0x0172, "uogon", 0x0173, "uplus", 0x228E, + "Upsi", 0x03A5, "upsi", 0x03C5, - "Upsi", 0x03D2, + "upsih", 0x03D2, + "Upsilon", 0x03A5, + "upsilon", 0x03C5, "urcorn", 0x231D, "urcrop", 0x230E, "Uring", 0x016E, @@ -1052,11 +1083,14 @@ "zcy", 0x0437, "Zdot", 0x017B, "zdot", 0x017C, + "Zeta", 0x0396, "zeta", 0x03B6, "Zgr", 0x0396, "zgr", 0x03B6, "ZHcy", 0x0416, "zhcy", 0x0436, + "zwj", 0x200D, + "zwnj", 0x200C, NULL, 0 }; @@ -1070,297 +1104,17 @@ int HtmlNamedEntity(unsigned char *p, int length) { int tableIndex = tolower(*p) - 'a'; - if (tableIndex >= 0 && tableIndex < 26) { + if (tableIndex >= 0 && tableIndex < 26) + { NameId* names = namesTable[tableIndex]; int i; - for (i = 0; names[i].name; i++){ - if (strncmp(names[i].name, (char *)p, length) == 0){ - return names[i].value; - } + for (i = 0; names[i].name; i++) + { + if (strncmp(names[i].name, (char *)p, length) == 0) + return names[i].value; } } - error("unrecognized character entity \"%.*s\"", length, p); - return -1; -} - -#else //TODO: Merge Walter's list with Thomas' - -static NameId names[] = -{ - // Entities - "quot", 34, - "amp", 38, - "lt", 60, - "gt", 62, - - "OElig", 338, - "oelig", 339, - "Scaron", 352, - "scaron", 353, - "Yuml", 376, - "circ", 710, - "tilde", 732, - "ensp", 8194, - "emsp", 8195, - "thinsp", 8201, - "zwnj", 8204, - "zwj", 8205, - "lrm", 8206, - "rlm", 8207, - "ndash", 8211, - "mdash", 8212, - "lsquo", 8216, - "rsquo", 8217, - "sbquo", 8218, - "ldquo", 8220, - "rdquo", 8221, - "bdquo", 8222, - "dagger", 8224, - "Dagger", 8225, - "permil", 8240, - "lsaquo", 8249, - "rsaquo", 8250, - "euro", 8364, - - // Latin-1 (ISO-8859-1) Entities - "nbsp", 160, - "iexcl", 161, - "cent", 162, - "pound", 163, - "curren", 164, - "yen", 165, - "brvbar", 166, - "sect", 167, - "uml", 168, - "copy", 169, - "ordf", 170, - "laquo", 171, - "not", 172, - "shy", 173, - "reg", 174, - "macr", 175, - "deg", 176, - "plusmn", 177, - "sup2", 178, - "sup3", 179, - "acute", 180, - "micro", 181, - "para", 182, - "middot", 183, - "cedil", 184, - "sup1", 185, - "ordm", 186, - "raquo", 187, - "frac14", 188, - "frac12", 189, - "frac34", 190, - "iquest", 191, - "Agrave", 192, - "Aacute", 193, - "Acirc", 194, - "Atilde", 195, - "Auml", 196, - "Aring", 197, - "AElig", 198, - "Ccedil", 199, - "Egrave", 200, - "Eacute", 201, - "Ecirc", 202, - "Euml", 203, - "Igrave", 204, - "Iacute", 205, - "Icirc", 206, - "Iuml", 207, - "ETH", 208, - "Ntilde", 209, - "Ograve", 210, - "Oacute", 211, - "Ocirc", 212, - "Otilde", 213, - "Ouml", 214, - "times", 215, - "Oslash", 216, - "Ugrave", 217, - "Uacute", 218, - "Ucirc", 219, - "Uuml", 220, - "Yacute", 221, - "THORN", 222, - "szlig", 223, - "agrave", 224, - "aacute", 225, - "acirc", 226, - "atilde", 227, - "auml", 228, - "aring", 229, - "aelig", 230, - "ccedil", 231, - "egrave", 232, - "eacute", 233, - "ecirc", 234, - "euml", 235, - "igrave", 236, - "iacute", 237, - "icirc", 238, - "iuml", 239, - "eth", 240, - "ntilde", 241, - "ograve", 242, - "oacute", 243, - "ocirc", 244, - "otilde", 245, - "ouml", 246, - "divide", 247, - "oslash", 248, - "ugrave", 249, - "uacute", 250, - "ucirc", 251, - "uuml", 252, - "yacute", 253, - "thorn", 254, - "yuml", 255, - - // Symbols and Greek letter entities - "fnof", 402, - "Alpha", 913, - "Beta", 914, - "Gamma", 915, - "Delta", 916, - "Epsilon", 917, - "Zeta", 918, - "Eta", 919, - "Theta", 920, - "Iota", 921, - "Kappa", 922, - "Lambda", 923, - "Mu", 924, - "Nu", 925, - "Xi", 926, - "Omicron", 927, - "Pi", 928, - "Rho", 929, - "Sigma", 931, - "Tau", 932, - "Upsilon", 933, - "Phi", 934, - "Chi", 935, - "Psi", 936, - "Omega", 937, - "alpha", 945, - "beta", 946, - "gamma", 947, - "delta", 948, - "epsilon", 949, - "zeta", 950, - "eta", 951, - "theta", 952, - "iota", 953, - "kappa", 954, - "lambda", 955, - "mu", 956, - "nu", 957, - "xi", 958, - "omicron", 959, - "pi", 960, - "rho", 961, - "sigmaf", 962, - "sigma", 963, - "tau", 964, - "upsilon", 965, - "phi", 966, - "chi", 967, - "psi", 968, - "omega", 969, - "thetasym", 977, - "upsih", 978, - "piv", 982, - "bull", 8226, - "hellip", 8230, - "prime", 8242, - "Prime", 8243, - "oline", 8254, - "frasl", 8260, - "weierp", 8472, - "image", 8465, - "real", 8476, - "trade", 8482, - "alefsym", 8501, - "larr", 8592, - "uarr", 8593, - "rarr", 8594, - "darr", 8595, - "harr", 8596, - "crarr", 8629, - "lArr", 8656, - "uArr", 8657, - "rArr", 8658, - "dArr", 8659, - "hArr", 8660, - "forall", 8704, - "part", 8706, - "exist", 8707, - "empty", 8709, - "nabla", 8711, - "isin", 8712, - "notin", 8713, - "ni", 8715, - "prod", 8719, - "sum", 8721, - "minus", 8722, - "lowast", 8727, - "radic", 8730, - "prop", 8733, - "infin", 8734, - "ang", 8736, - "and", 8743, - "or", 8744, - "cap", 8745, - "cup", 8746, - "int", 8747, - "there4", 8756, - "sim", 8764, - "cong", 8773, - "asymp", 8776, - "ne", 8800, - "equiv", 8801, - "le", 8804, - "ge", 8805, - "sub", 8834, - "sup", 8835, - "nsub", 8836, - "sube", 8838, - "supe", 8839, - "oplus", 8853, - "otimes", 8855, - "perp", 8869, - "sdot", 8901, - "lceil", 8968, - "rceil", 8969, - "lfloor", 8970, - "rfloor", 8971, - "lang", 9001, - "rang", 9002, - "loz", 9674, - "spades", 9824, - "clubs", 9827, - "hearts", 9829, - "diams", 9830, -}; - -int HtmlNamedEntity(unsigned char *p, int length) -{ - int i; - - // BUG: this is a dumb, slow linear search - for (i = 0; i < sizeof(names) / sizeof(names[0]); i++) - { - // Entries are case sensitive - if (memcmp(names[i].name, (char *)p, length) == 0 && - !names[i].name[length]) - return names[i].value; - } return -1; } -#endif -- Configure issuemail: http://d.puremagic.com/issues/userprefs.cgi?tab=email ------- You are receiving this mail because: -------