On 2 Wrz, 06:05, "Mark Tolonen" <[EMAIL PROTECTED]> wrote: > "Marco Bizzarri" <[EMAIL PROTECTED]> wrote in message > > news:[EMAIL PROTECTED] > > > > > On Mon, Sep 1, 2008 at 3:25 PM, <[EMAIL PROTECTED]> wrote: > > >> When I do ${urllib.unquote(c.user.firstName)} without encoding to > >> latin-1 I got different chars than I will get: no Ùukasz but Å ukasz > >> -- > >>http://mail.python.org/mailman/listinfo/python-list > > > That's crazy. "string".encode('latin1') gives you a latin1 encoded > > string; latin1 is a single byte encoding, therefore taking the first > > byte should be no problem. > > > Have you tried: > > > urlib.unquote(c.user.firstName)[0].encode('latin1') or > > > urlib.unquote(c.user.firstName)[0].encode('utf8') > > > I'm assuming here that the urlib.unquote(c.user.firstName) returns an > > encodable string (which I'm absolutely not sure), but if it does, this > > should take the first 'character'. > > The OP stated that the original string was "encoded in UTF-8 and > urllib.quote()", so after urllib.unquote the string is in UTF-8 format. > This must be decoded into a Unicode string before removing the first > character: > > urllib.unquote(c.user.firstName).decode('utf-8')[0] > > The next problem is that the character in the OP's example string 'Ù' is not > present in the latin-1 encoding, but using utf-8 encoding demonstrates that > the full two-byte UTF-8 encoded character is collected: > > >>> import urllib > >>> name = urllib.quote(u'Ùukasz'.encode('utf-8')) > >>> name > '%C5%81ukasz' > >>> urllib.unquote(name).decode('utf-8')[0].encode('utf-8') > '\xc5\x81' > > -Mark
@Mark, when I tried urllib.unquote(c.user.firstName).decode('utf-8') [0].encode('utf-8'), I received this message: >> return render('/reports/create_report_step2.mako') Module pylons.templating:344 in render << **cache_args) return pylons.buffet.render(template_name=template, fragment=fragment, format=format, namespace=kargs, **cache_args) >> format=format, namespace=kargs, **cache_args) Module pylons.templating:229 in render << log.debug("Rendering template %s with engine %s", full_path, engine_name) return engine_config['engine'].render(namespace, template=full_path, **options)>> **options) Module mako.ext.turbogears:49 in render << info.update(self.extra_vars_func()) return template.render(**info) >> return template.render(**info) Module mako.template:114 in render << declared by this template's internal rendering method are also pulled from the given *args, **data members. members.""" return runtime._render(self, self.callable_, args, data) def render_unicode(self, *args, **data):>> return runtime._render(self, self.callable_, args, data) Module mako.runtime:287 in _render << context = Context(buf, **data) context._with_template = template _render_context(template, callable_, context, *args, **_kwargs_for_callable(callable_, data)) return context.pop_buffer().getvalue()>> _render_context(template, callable_, context, *args, **_kwargs_for_callable(callable_, data)) Module mako.runtime:304 in _render_context << # if main render method, call from the base of the inheritance stack (inherit, lclcontext) = _populate_self_namespace(context, tmpl) _exec_template(inherit, lclcontext, args=args, kwargs=kwargs) else: # otherwise, call the actual rendering method specified>> _exec_template(inherit, lclcontext, args=args, kwargs=kwargs) Module mako.runtime:337 in _exec_template << error_template.render_context(context, error=error) else: callable_(context, *args, **kwargs)>> callable_(context, *args, **kwargs) Module _reports_create_report_step2_mako:57 in render_body << context.write(filters.decode.utf8(urllib.unquote(str(c.period.end)))) context.write(u' + ') context.write(filters.decode.utf8(urllib.unquote(c.user.firstName).decode('utf-8') [0].encode('utf-8'))) context.write(filters.decode.utf8(urllib.unquote(str(c.user.secondName) [0:1]))) context.write(u'</h3>\r\n <input type="hidden" name="works[]" value="')>> context.write(filters.decode.utf8(urllib.unquote(c.user.firstName).decode('utf-8') [0].encode('utf-8'))) Module encodings.utf_8:16 in decode << def decode(input, errors='strict'): return codecs.utf_8_decode(input, errors, True) class IncrementalEncoder(codecs.IncrementalEncoder):>> return codecs.utf_8_decode(input, errors, True) <type 'exceptions.UnicodeEncodeError'>: 'ascii' codec can't encode characters in position 0-1: ordinal not in range(128) -- http://mail.python.org/mailman/listinfo/python-list