Will McGugan wrote:
I'd like to replace html escape sequences, like   and ' with
single characters. Is there a dictionary defined somewhere I can use to
replace these sequences?
How about this?
import re
from htmlentitydefs import name2codepoint
_entity_re = re.compile(r'&(?:(#)(\d+)|([^;]+));')
def _repl_func(match):
if match.group(1): # Numeric character reference
return unichr(int(match.group(2)))
else:
return unichr(name2codepoint[match.group(3)])
def handle_html_entities(string):
return _entity_re.sub(_repl_func, string)
--
http://mail.python.org/mailman/listinfo/python-list