Here are the diffs for the unhtmlentities function. Also included is a test script for some rudimentary benchmarking.
-Brad
unhtmlentities_test.php
Description: application/unknown-content-type-microsoft
--- html.c Tue Feb 26 22:44:44 2002
+++ html.c Wed Feb 27 00:42:04 2002
@@ -123,6 +123,39 @@
{ NULL }
};
+
+/* Entities table used by unhtmlentities - This should be changed to use the same
+tables as htmlentities */
+struct entity {
+ char* str;
+ char ch;
+};
+
+static struct entity il_EntTable[] =
+{
+ {"quot",34}, {"amp",38}, {"lt",60}, {"gt",62}, {"nbsp",160},
+ {"iexcl",161}, {"cent",162}, {"pound",163}, {"curren",164}, {"yen",165},
+ {"brvbar",166}, {"sect",167}, {"uml",168}, {"copy",169}, {"ordf",170},
+ {"laquo",171}, {"not",172}, {"shy",173}, {"reg",174}, {"macr",175},
+ {"deg",176}, {"plusmn",177}, {"sup2",178}, {"sup3",179}, {"acute",180},
+ {"micro",181}, {"para",182}, {"middot",183}, {"cedil",184}, {"sup1",185},
+ {"ordm",186}, {"raquo",187}, {"frac14",188}, {"frac12",189},
+{"frac34",190},
+ {"iquest",191}, {"Agrave",192}, {"Aacute",193}, {"Acirc",194},
+{"Atilde",195},
+ {"Auml",196}, {"Aring",197}, {"AElig",198}, {"Ccedil",199},
+{"Egrave",200},
+ {"Eacute",201}, {"Ecirc",202}, {"Euml",203}, {"Igrave",204},
+{"Iacute",205},
+ {"Icirc",206}, {"Iuml",207}, {"ETH",208}, {"Ntilde",209},
+{"Ograve",210},
+ {"Oacute",211}, {"Ocirc",212}, {"Otilde",213}, {"Ouml",214}, {"times",215},
+ {"Oslash",216}, {"Ugrave",217}, {"Uacute",218}, {"Ucirc",219}, {"Uuml",220},
+ {"Yacute",221}, {"THORN",222}, {"szlig",223}, {"agrave",224},
+{"aacute",225},
+ {"acirc",226}, {"atilde",227}, {"auml",228}, {"aring",229}, {"aelig",230},
+ {"ccedil",231}, {"egrave",232}, {"eacute",233}, {"ecirc",234}, {"euml",235},
+ {"igrave",236}, {"iacute",237}, {"icirc",238}, {"iuml",239}, {"eth",240},
+ {"ntilde",241}, {"ograve",242}, {"oacute",243}, {"ocirc",244},
+{"otilde",245},
+ {"ouml",246}, {"divide",247}, {"oslash",248}, {"ugrave",249},
+{"uacute",250},
+ {"ucirc",251}, {"uuml",252}, {"yacute",253}, {"thorn",254}, {"yuml",255}
+};
+
+
+
/* {{{ get_next_char
*/
inline static unsigned short get_next_char(enum entity_charset charset,
@@ -575,6 +608,136 @@
}
}
/* }}} */
+
+
+/* {{{ proto string il_str_decode_htmlentities(string str)
+ Translates HTML entities in the given string into the appropriate characters. */
+PHPAPI char* php_str_unhtmlentities(char *str, unsigned int *resultlen TSRMLS_DC)
+{
+ char *p, *sp, *ep,
+ *buf;
+ int buflen = 0,
+ len,
+ slen;
+
+ if (resultlen) {
+ if (*resultlen != 0)
+ slen = *resultlen;
+ else
+ slen = strlen(str);
+ } //if
+
+ // Scan through the string and find entities to decode
+ buf = emalloc(slen * 2);
+
+ p = str;
+ while (sp = strchr(p, '&')) {
+ ep = sp + sizeof(char);
+
+ // Scan up to 15 characters ahead for a ';'
+ while ((*ep) && (*ep != ';') && (ep < sp + sizeof(char)*15)) {
+ if (*ep == '&') sp = ep;
+ ep += sizeof(char);
+ } //while
+ if (!(*ep)) break; // End of string
+
+ // Copy the previous string data up to this point
+ len = (sp - p) / sizeof(char);
+// strncpy(&buf[buflen], p, len);
+ memcpy(&buf[buflen], p, len * sizeof(char));
+ buflen += len;
+
+ // Translate the entity
+ len = (ep - sp) / sizeof(char) - 1;
+ if (len > 0) {
+ int i,
+ found = 0;
+ char ch = *(ep);
+
+ sp += sizeof(char);
+ *ep = 0;
+
+ if (*sp == '#') {
+ if ((len > 1) && (len <= 4)) {
+ unsigned long ch = strtoul(sp + sizeof(char),
+(char**) NULL, 10);
+ if (ch <= (unsigned long) UCHAR_MAX) {
+ buf[buflen] = (char) ch;
+ ++buflen;
+ found = 1;
+ } //if
+ } //if
+ } else {
+ for (i = 0; i < (sizeof(il_EntTable) / sizeof(struct
+entity)); ++i) {
+ if (strcmp(sp, il_EntTable[i].str) == 0) {
+ buf[buflen] = il_EntTable[i].ch;
+ ++buflen;
+
+ found = 1;
+ break;
+ } //if
+ } //for
+ } //if
+
+ *ep = ch;
+
+ // Copy the entity as-is if it is not recognized
+ if (!found) {
+ len += 2;
+ sp -= sizeof(char);
+// strncpy(&buf[buflen], sp, len);
+ memcpy(&buf[buflen], sp, len * sizeof(char));
+ buflen += len;
+ } //if
+ } else { // No data in entity? (ie. "&;" Just copy as-is... not an
+entity
+// strncpy(&buf[buflen], sp, 2);
+ memcpy(&buf[buflen], sp, 2 * sizeof(char));
+ buflen += 2;
+ } //if
+
+ // Start checking for the next match
+ p = ep + sizeof(char);
+ } //while
+
+ // Copy any remaining portion of the string
+ len = slen - ((p - str) / sizeof(char));
+ strncpy(&buf[buflen], p, len);
+ buflen += len;
+
+ // Reallocate the buffer to match the size of the result
+ buf = erealloc(buf, buflen + 1);
+ buf[buflen] = 0;
+
+ if (resultlen)
+ *resultlen = buflen;
+} // php_str_unhtmlentities()
+/* }}} */
+
+
+/* {{{ proto string il_str_decode_htmlentities(string str)
+ Translates HTML entities in the given string into the appropriate characters. */
+PHP_FUNCTION(unhtmlentities)
+{
+/*
+ This function is basically the reverse of the standard PHP function htmlentities,
+however it DOES NOT currently
+ use the same translation table (or the locale). HTML entities have the form
+"&data;" where data is either the
+ name of an entity (ie. >, <, ") or a # symbol followed by a decimal
+value from 0 to 255 (ie. ", &)
+*/
+ char *str;
+ uint strlen = 0;
+
+ char *result;
+
+ if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s", &str, &strlen) ==
+FAILURE) {
+ return;
+ }
+
+ result = php_str_unhtmlentities(str, &strlen);
+
+ // Return the result
+ RETURN_STRINGL(result, strlen, 0);
+} // PHP_FUNCTION(unhtmlentities)
+/* }}} */
+
/*
* Local variables:
--- html.h Tue Feb 26 22:44:44 2002 +++ html.h Wed Feb 27 00:14:54 2002 @@ -29,6 +29,7 @@ PHP_FUNCTION(htmlspecialchars); PHP_FUNCTION(htmlentities); +PHP_FUNCTION(unhtmlentities); PHP_FUNCTION(get_html_translation_table); PHPAPI char *php_escape_html_entities(unsigned char *old, int oldlen, int *newlen, int all, int quote_style, char * hint_charset);
--- basic_functions.c Tue Feb 26 22:44:44 2002
+++ basic_functions.c Wed Feb 27 00:22:04 2002
@@ -274,6 +274,7 @@
PHP_FE(wordwrap,
NULL)
PHP_FE(htmlspecialchars,
NULL)
PHP_FE(htmlentities,
NULL)
+ PHP_FE(unhtmlentities,
+ NULL)
PHP_FE(get_html_translation_table,
NULL)
PHP_NAMED_FE(md5,php_if_md5,
NULL)
PHP_NAMED_FE(md5_file,php_if_md5_file,
NULL)
-- PHP Development Mailing List <http://www.php.net/> To unsubscribe, visit: http://www.php.net/unsub.php
