moriyoshi Tue, 22 Dec 2009 05:50:34 +0000
Revision: http://svn.php.net/viewvc?view=revision&revision=292467
Log:
- Fix bug #46478 (htmlentities() uses obsolete mapping table for character
entity references)
Bug: http://bugs.php.net/46478 (Assigned) htmlentities() uses obsolete mapping
table for character entity references
Changed paths:
U php/php-src/branches/PHP_5_3/NEWS
U php/php-src/branches/PHP_5_3/ext/standard/html.c
A
php/php-src/branches/PHP_5_3/ext/standard/tests/strings/html_entity_decode_html4.phpt
A
php/php-src/branches/PHP_5_3/ext/standard/tests/strings/htmlentities_html4.phpt
Modified: php/php-src/branches/PHP_5_3/NEWS
===================================================================
--- php/php-src/branches/PHP_5_3/NEWS 2009-12-22 03:19:47 UTC (rev 292466)
+++ php/php-src/branches/PHP_5_3/NEWS 2009-12-22 05:50:34 UTC (rev 292467)
@@ -134,6 +134,8 @@
- Fixed bug #49174 (crash when extending PDOStatement and trying to set
queryString property). (Felipe)
- Fixed bug #47848 (importNode doesn't preserve attribute namespaces). (Rob)
+- Fixed bug #46478 (htmlentities() uses obsolete mapping table for character
+ entity references). (Moriyoshi)
- Fixed bug #45599 (strip_tags() truncates rest of string with invalid
attribute). (Ilia, hradtke)
- Fixed bug #45120 (PDOStatement->execute() returns true then false for same
Modified: php/php-src/branches/PHP_5_3/ext/standard/html.c
===================================================================
--- php/php-src/branches/PHP_5_3/ext/standard/html.c 2009-12-22 03:19:47 UTC (rev 292466)
+++ php/php-src/branches/PHP_5_3/ext/standard/html.c 2009-12-22 05:50:34 UTC (rev 292467)
@@ -144,7 +144,7 @@
NULL, NULL, NULL, NULL, NULL, NULL, NULL,
"thetasym", "upsih",
NULL, NULL, NULL,
- "piv"
+ "piv"
};
static entity_table_t ent_uni_punct[] = {
@@ -154,7 +154,7 @@
NULL, NULL, NULL, "ndash", "mdash", NULL, NULL, NULL,
/* 8216 */
"lsquo", "rsquo", "sbquo", NULL, "ldquo", "rdquo", "bdquo", NULL,
- "dagger", "Dagger", "bull", NULL, NULL, NULL, "hellip",
+ "dagger", "Dagger", "bull", NULL, NULL, NULL, "hellip",
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, "permil", NULL,
/* 8242 */
"prime", "Prime", NULL, NULL, NULL, NULL, NULL, "lsaquo", "rsaquo", NULL,
@@ -194,39 +194,39 @@
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
/* 8656 (0x21d0) */
- "lArr", "uArr", "rArr", "dArr", "hArr", "vArr", NULL, NULL,
- NULL, NULL, "lAarr", "rAarr", NULL, "rarrw", NULL, NULL,
+ "lArr", "uArr", "rArr", "dArr", "hArr", NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
/* 8672 (0x21e0) */
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
/* 8704 (0x2200) */
- "forall", "comp", "part", "exist", "nexist", "empty", NULL, "nabla",
- "isin", "notin", "epsis", "ni", "notni", "bepsi", NULL, "prod",
+ "forall", NULL, "part", "exist", NULL, "empty", NULL, "nabla",
+ "isin", "notin", NULL, "ni", NULL, NULL, NULL, "prod",
/* 8720 (0x2210) */
- "coprod", "sum", "minus", "mnplus", "plusdo", NULL, "setmn", "lowast",
- "compfn", NULL, "radic", NULL, NULL, "prop", "infin", "ang90",
+ NULL, "sum", "minus", NULL, NULL, NULL, NULL, "lowast",
+ NULL, NULL, "radic", NULL, NULL, "prop", "infin", NULL,
/* 8736 (0x2220) */
- "ang", "angmsd", "angsph", "mid", "nmid", "par", "npar", "and",
- "or", "cap", "cup", "int", NULL, NULL, "conint", NULL,
+ "ang", NULL, NULL, NULL, NULL, NULL, NULL, "and",
+ "or", "cap", "cup", "int", NULL, NULL, NULL, NULL,
/* 8752 (0x2230) */
- NULL, NULL, NULL, NULL, "there4", "becaus", NULL, NULL,
- NULL, NULL, NULL, NULL, "sim", "bsim", NULL, NULL,
+ NULL, NULL, NULL, NULL, "there4", NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL, "sim", NULL, NULL, NULL,
/* 8768 (0x2240) */
- "wreath", "nsim", NULL, "sime", "nsime", "cong", NULL, "ncong",
- "asymp", "nap", "ape", NULL, "bcong", "asymp", "bump", "bumpe",
+ NULL, NULL, NULL, NULL, NULL, "cong", NULL, NULL,
+ "asymp", NULL, NULL, NULL, NULL, NULL, NULL, NULL,
/* 8784 (0x2250) */
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
/* 8800 (0x2260) */
- "ne", "equiv", NULL, NULL, "le", "ge", "lE", "gE",
- "lnE", "gnE", "Lt", "Gt", "twixt", NULL, "nlt", "ngt",
+ "ne", "equiv", NULL, NULL, "le", "ge", NULL, NULL,
+ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
/* 8816 (0x2270) */
- "nles", "nges", "lsim", "gsim", NULL, NULL, "lg", "gl",
- NULL, NULL, "pr", "sc", "cupre", "sscue", "prsim", "scsim",
+ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
/* 8832 (0x2280) */
- "npr", "nsc", "sub", "sup", "nsub", "nsup", "sube", "supe",
+ NULL, NULL, "sub", "sup", "nsub", NULL, "sube", "supe",
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
/* 8848 (0x2290) */
NULL, NULL, NULL, NULL, NULL, "oplus", NULL, "otimes",
Added: php/php-src/branches/PHP_5_3/ext/standard/tests/strings/html_entity_decode_html4.phpt
===================================================================
--- php/php-src/branches/PHP_5_3/ext/standard/tests/strings/html_entity_decode_html4.phpt (rev 0)
+++ php/php-src/branches/PHP_5_3/ext/standard/tests/strings/html_entity_decode_html4.phpt 2009-12-22 05:50:34 UTC (rev 292467)
@@ -0,0 +1,516 @@
+--TEST--
+html_entity_decode() conformance check (HTML 4)
+--FILE--
+<?php
+$map = array(
+ """,
+ "&",
+ "<",
+ ">",
+ " ",
+ "¡",
+ "¢",
+ "£",
+ "¤",
+ "¥",
+ "¦",
+ "§",
+ "¨",
+ "©",
+ "ª",
+ "«",
+ "¬",
+ "­",
+ "®",
+ "¯",
+ "°",
+ "±",
+ "²",
+ "³",
+ "´",
+ "µ",
+ "¶",
+ "·",
+ "¸",
+ "¹",
+ "º",
+ "»",
+ "¼",
+ "½",
+ "¾",
+ "¿",
+ "À",
+ "Á",
+ "Â",
+ "Ã",
+ "Ä",
+ "Å",
+ "Æ",
+ "Ç",
+ "È",
+ "É",
+ "Ê",
+ "Ë",
+ "Ì",
+ "Í",
+ "Î",
+ "Ï",
+ "Ð",
+ "Ñ",
+ "Ò",
+ "Ó",
+ "Ô",
+ "Õ",
+ "Ö",
+ "×",
+ "Ø",
+ "Ù",
+ "Ú",
+ "Û",
+ "Ü",
+ "Ý",
+ "Þ",
+ "ß",
+ "à",
+ "á",
+ "â",
+ "ã",
+ "ä",
+ "å",
+ "æ",
+ "ç",
+ "è",
+ "é",
+ "ê",
+ "ë",
+ "ì",
+ "í",
+ "î",
+ "ï",
+ "ð",
+ "ñ",
+ "ò",
+ "ó",
+ "ô",
+ "õ",
+ "ö",
+ "÷",
+ "ø",
+ "ù",
+ "ú",
+ "û",
+ "ü",
+ "ý",
+ "þ",
+ "ÿ",
+ "Œ",
+ "œ",
+ "Š",
+ "š",
+ "Ÿ",
+ "ƒ",
+ "ˆ",
+ "˜",
+ "Α",
+ "Β",
+ "Γ",
+ "Δ",
+ "Ε",
+ "Ζ",
+ "Η",
+ "Θ",
+ "Ι",
+ "Κ",
+ "Λ",
+ "Μ",
+ "Ν",
+ "Ξ",
+ "Ο",
+ "Π",
+ "Ρ",
+ "Σ",
+ "Τ",
+ "Υ",
+ "Φ",
+ "Χ",
+ "Ψ",
+ "Ω",
+ "α",
+ "β",
+ "γ",
+ "δ",
+ "ε",
+ "ζ",
+ "η",
+ "θ",
+ "ι",
+ "κ",
+ "λ",
+ "μ",
+ "ν",
+ "ξ",
+ "ο",
+ "π",
+ "ρ",
+ "ς",
+ "σ",
+ "τ",
+ "υ",
+ "φ",
+ "χ",
+ "ψ",
+ "ω",
+ "ϑ",
+ "ϒ",
+ "ϖ",
+ " ",
+ " ",
+ " ",
+ "‌",
+ "‍",
+ "‎",
+ "‏",
+ "–",
+ "—",
+ "‘",
+ "’",
+ "‚",
+ "“",
+ "”",
+ "„",
+ "†",
+ "‡",
+ "•",
+ "…",
+ "‰",
+ "′",
+ "″",
+ "‹",
+ "›",
+ "‾",
+ "⁄",
+ "€",
+ "ℑ",
+ "℘",
+ "ℜ",
+ "™",
+ "ℵ",
+ "←",
+ "↑",
+ "→",
+ "↓",
+ "↔",
+ "↵",
+ "⇐",
+ "⇑",
+ "⇒",
+ "⇓",
+ "⇔",
+ "∀",
+ "∂",
+ "∃",
+ "∅",
+ "∇",
+ "∈",
+ "∉",
+ "∋",
+ "∏",
+ "∑",
+ "−",
+ "∗",
+ "√",
+ "∝",
+ "∞",
+ "∠",
+ "∧",
+ "∨",
+ "∩",
+ "∪",
+ "∫",
+ "∴",
+ "∼",
+ "≅",
+ "≈",
+ "≠",
+ "≡",
+ "≤",
+ "≥",
+ "⊂",
+ "⊃",
+ "⊄",
+ "⊆",
+ "⊇",
+ "⊕",
+ "⊗",
+ "⊥",
+ "⋅",
+ "⌈",
+ "⌉",
+ "⌊",
+ "⌋",
+ "⟨",
+ "⟩",
+ "◊",
+ "♠",
+ "♣",
+ "♥",
+ "♦",
+);
+
+foreach ($map as $str) {
+ echo bin2hex(html_entity_decode($str, ENT_QUOTES, "UTF-8")), "\n";
+}
+?>
+--EXPECT--
+22
+26
+3c
+3e
+c2a0
+c2a1
+c2a2
+c2a3
+c2a4
+c2a5
+c2a6
+c2a7
+c2a8
+c2a9
+c2aa
+c2ab
+c2ac
+c2ad
+c2ae
+c2af
+c2b0
+c2b1
+c2b2
+c2b3
+c2b4
+c2b5
+c2b6
+c2b7
+c2b8
+c2b9
+c2ba
+c2bb
+c2bc
+c2bd
+c2be
+c2bf
+c380
+c381
+c382
+c383
+c384
+c385
+c386
+c387
+c388
+c389
+c38a
+c38b
+c38c
+c38d
+c38e
+c38f
+c390
+c391
+c392
+c393
+c394
+c395
+c396
+c397
+c398
+c399
+c39a
+c39b
+c39c
+c39d
+c39e
+c39f
+c3a0
+c3a1
+c3a2
+c3a3
+c3a4
+c3a5
+c3a6
+c3a7
+c3a8
+c3a9
+c3aa
+c3ab
+c3ac
+c3ad
+c3ae
+c3af
+c3b0
+c3b1
+c3b2
+c3b3
+c3b4
+c3b5
+c3b6
+c3b7
+c3b8
+c3b9
+c3ba
+c3bb
+c3bc
+c3bd
+c3be
+c3bf
+c592
+c593
+c5a0
+c5a1
+c5b8
+c692
+cb86
+cb9c
+ce91
+ce92
+ce93
+ce94
+ce95
+ce96
+ce97
+ce98
+ce99
+ce9a
+ce9b
+ce9c
+ce9d
+ce9e
+ce9f
+cea0
+cea1
+cea3
+cea4
+cea5
+cea6
+cea7
+cea8
+cea9
+ceb1
+ceb2
+ceb3
+ceb4
+ceb5
+ceb6
+ceb7
+ceb8
+ceb9
+ceba
+cebb
+cebc
+cebd
+cebe
+cebf
+cf80
+cf81
+cf82
+cf83
+cf84
+cf85
+cf86
+cf87
+cf88
+cf89
+cf91
+cf92
+cf96
+e28082
+e28083
+e28089
+e2808c
+e2808d
+e2808e
+e2808f
+e28093
+e28094
+e28098
+e28099
+e2809a
+e2809c
+e2809d
+e2809e
+e280a0
+e280a1
+e280a2
+e280a6
+e280b0
+e280b2
+e280b3
+e280b9
+e280ba
+e280be
+e28184
+e282ac
+e28491
+e28498
+e2849c
+e284a2
+e284b5
+e28690
+e28691
+e28692
+e28693
+e28694
+e286b5
+e28790
+e28791
+e28792
+e28793
+e28794
+e28880
+e28882
+e28883
+e28885
+e28887
+e28888
+e28889
+e2888b
+e2888f
+e28891
+e28892
+e28897
+e2889a
+e2889d
+e2889e
+e288a0
+e288a7
+e288a8
+e288a9
+e288aa
+e288ab
+e288b4
+e288bc
+e28985
+e28988
+e289a0
+e289a1
+e289a4
+e289a5
+e28a82
+e28a83
+e28a84
+e28a86
+e28a87
+e28a95
+e28a97
+e28aa5
+e28b85
+e28c88
+e28c89
+e28c8a
+e28c8b
+e28ca9
+e28caa
+e2978a
+e299a0
+e299a3
+e299a5
+e299a6
Added: php/php-src/branches/PHP_5_3/ext/standard/tests/strings/htmlentities_html4.phpt
===================================================================
--- php/php-src/branches/PHP_5_3/ext/standard/tests/strings/htmlentities_html4.phpt (rev 0)
+++ php/php-src/branches/PHP_5_3/ext/standard/tests/strings/htmlentities_html4.phpt 2009-12-22 05:50:34 UTC (rev 292467)
@@ -0,0 +1,305 @@
+--TEST--
+htmlentities() conformance check (HTML 4)
+--FILE--
+<?php
+function utf32_utf8($k) {
+ if ($k < 0x80) {
+ $retval = pack('C', $k);
+ } else if ($k < 0x800) {
+ $retval = pack('C2',
+ 0xc0 | ($k >> 6),
+ 0x80 | ($k & 0x3f));
+ } else if ($k < 0x10000) {
+ $retval = pack('C3',
+ 0xe0 | ($k >> 12),
+ 0x80 | (($k >> 6) & 0x3f),
+ 0x80 | ($k & 0x3f));
+ } else if ($k < 0x200000) {
+ $retval = pack('C4',
+ 0xf0 | ($k >> 18),
+ 0x80 | (($k >> 12) & 0x3f),
+ 0x80 | (($k >> 6) & 0x3f),
+ 0x80 | ($k & 0x3f));
+ } else if ($k < 0x4000000) {
+ $retval = pack('C5',
+ 0xf8 | ($k >> 24),
+ 0x80 | (($k >> 18) & 0x3f),
+ 0x80 | (($k >> 12) & 0x3f),
+ 0x80 | (($k >> 6) & 0x3f),
+ 0x80 | ($k & 0x3f));
+ } else {
+ $retval = pack('C6',
+ 0xfc | ($k >> 30),
+ 0x80 | (($k >> 24) & 0x3f),
+ 0x80 | (($k >> 18) & 0x3f),
+ 0x80 | (($k >> 12) & 0x3f),
+ 0x80 | (($k >> 6) & 0x3f),
+ 0x80 | ($k & 0x3f));
+ }
+ return $retval;
+}
+
+for ($i = 0; $i < 0x110000; $i++) {
+ if ($i >= 0xd800 && $i < 0xe000)
+ continue;
+ $str = utf32_utf8($i);
+ $result = htmlentities($str, ENT_QUOTES, 'UTF-8');
+ if ($str != $result) {
+ printf("%s\tU+%05X\n", $result, $i);
+ }
+}
+?>
+--EXPECT--
+" U+00022
+& U+00026
+' U+00027
+< U+0003C
+> U+0003E
+ U+000A0
+¡ U+000A1
+¢ U+000A2
+£ U+000A3
+¤ U+000A4
+¥ U+000A5
+¦ U+000A6
+§ U+000A7
+¨ U+000A8
+© U+000A9
+ª U+000AA
+« U+000AB
+¬ U+000AC
+­ U+000AD
+® U+000AE
+¯ U+000AF
+° U+000B0
+± U+000B1
+² U+000B2
+³ U+000B3
+´ U+000B4
+µ U+000B5
+¶ U+000B6
+· U+000B7
+¸ U+000B8
+¹ U+000B9
+º U+000BA
+» U+000BB
+¼ U+000BC
+½ U+000BD
+¾ U+000BE
+¿ U+000BF
+À U+000C0
+Á U+000C1
+Â U+000C2
+Ã U+000C3
+Ä U+000C4
+Å U+000C5
+Æ U+000C6
+Ç U+000C7
+È U+000C8
+É U+000C9
+Ê U+000CA
+Ë U+000CB
+Ì U+000CC
+Í U+000CD
+Î U+000CE
+Ï U+000CF
+Ð U+000D0
+Ñ U+000D1
+Ò U+000D2
+Ó U+000D3
+Ô U+000D4
+Õ U+000D5
+Ö U+000D6
+× U+000D7
+Ø U+000D8
+Ù U+000D9
+Ú U+000DA
+Û U+000DB
+Ü U+000DC
+Ý U+000DD
+Þ U+000DE
+ß U+000DF
+à U+000E0
+á U+000E1
+â U+000E2
+ã U+000E3
+ä U+000E4
+å U+000E5
+æ U+000E6
+ç U+000E7
+è U+000E8
+é U+000E9
+ê U+000EA
+ë U+000EB
+ì U+000EC
+í U+000ED
+î U+000EE
+ï U+000EF
+ð U+000F0
+ñ U+000F1
+ò U+000F2
+ó U+000F3
+ô U+000F4
+õ U+000F5
+ö U+000F6
+÷ U+000F7
+ø U+000F8
+ù U+000F9
+ú U+000FA
+û U+000FB
+ü U+000FC
+ý U+000FD
+þ U+000FE
+ÿ U+000FF
+Œ U+00152
+œ U+00153
+Š U+00160
+š U+00161
+Ÿ U+00178
+ƒ U+00192
+ˆ U+002C6
+˜ U+002DC
+Α U+00391
+Β U+00392
+Γ U+00393
+Δ U+00394
+Ε U+00395
+Ζ U+00396
+Η U+00397
+Θ U+00398
+Ι U+00399
+Κ U+0039A
+Λ U+0039B
+Μ U+0039C
+Ν U+0039D
+Ξ U+0039E
+Ο U+0039F
+Π U+003A0
+Ρ U+003A1
+Σ U+003A3
+Τ U+003A4
+Υ U+003A5
+Φ U+003A6
+Χ U+003A7
+Ψ U+003A8
+Ω U+003A9
+α U+003B1
+β U+003B2
+γ U+003B3
+δ U+003B4
+ε U+003B5
+ζ U+003B6
+η U+003B7
+θ U+003B8
+ι U+003B9
+κ U+003BA
+λ U+003BB
+μ U+003BC
+ν U+003BD
+ξ U+003BE
+ο U+003BF
+π U+003C0
+ρ U+003C1
+ς U+003C2
+σ U+003C3
+τ U+003C4
+υ U+003C5
+φ U+003C6
+χ U+003C7
+ψ U+003C8
+ω U+003C9
+ϑ U+003D1
+ϒ U+003D2
+ϖ U+003D6
+  U+02002
+  U+02003
+  U+02009
+‌ U+0200C
+‍ U+0200D
+‎ U+0200E
+‏ U+0200F
+– U+02013
+— U+02014
+‘ U+02018
+’ U+02019
+‚ U+0201A
+“ U+0201C
+” U+0201D
+„ U+0201E
+† U+02020
+‡ U+02021
+• U+02022
+… U+02026
+‰ U+02030
+′ U+02032
+″ U+02033
+‹ U+02039
+› U+0203A
+‾ U+0203E
+⁄ U+02044
+€ U+020AC
+ℑ U+02111
+℘ U+02118
+ℜ U+0211C
+™ U+02122
+ℵ U+02135
+← U+02190
+↑ U+02191
+→ U+02192
+↓ U+02193
+↔ U+02194
+↵ U+021B5
+⇐ U+021D0
+⇑ U+021D1
+⇒ U+021D2
+⇓ U+021D3
+⇔ U+021D4
+∀ U+02200
+∂ U+02202
+∃ U+02203
+∅ U+02205
+∇ U+02207
+∈ U+02208
+∉ U+02209
+∋ U+0220B
+∏ U+0220F
+∑ U+02211
+− U+02212
+∗ U+02217
+√ U+0221A
+∝ U+0221D
+∞ U+0221E
+∠ U+02220
+∧ U+02227
+∨ U+02228
+∩ U+02229
+∪ U+0222A
+∫ U+0222B
+∴ U+02234
+∼ U+0223C
+≅ U+02245
+≈ U+02248
+≠ U+02260
+≡ U+02261
+≤ U+02264
+≥ U+02265
+⊂ U+02282
+⊃ U+02283
+⊄ U+02284
+⊆ U+02286
+⊇ U+02287
+⊕ U+02295
+⊗ U+02297
+⊥ U+022A5
+⋅ U+022C5
+⌈ U+02308
+⌉ U+02309
+⌊ U+0230A
+⌋ U+0230B
+⟨ U+02329
+⟩ U+0232A
+◊ U+025CA
+♠ U+02660
+♣ U+02663
+♥ U+02665
+♦ U+02666
--
PHP CVS Mailing List (http://www.php.net/)
To unsubscribe, visit: http://www.php.net/unsub.php