On 2 Wrz, 10:17, [EMAIL PROTECTED] wrote: > On 2 Wrz, 06:05, "Mark Tolonen" <[EMAIL PROTECTED]> wrote: > > > > > "Marco Bizzarri" <[EMAIL PROTECTED]> wrote in message > > >news:[EMAIL PROTECTED] > > > > On Mon, Sep 1, 2008 at 3:25 PM, <[EMAIL PROTECTED]> wrote: > > > >> When I do ${urllib.unquote(c.user.firstName)} without encoding to > > >> latin-1 I got different chars than I will get: no Ùukasz but Å ukasz > > >> -- > > >>http://mail.python.org/mailman/listinfo/python-list > > > > That's crazy. "string".encode('latin1') gives you a latin1 encoded > > > string; latin1 is a single byte encoding, therefore taking the first > > > byte should be no problem. > > > > Have you tried: > > > > urlib.unquote(c.user.firstName)[0].encode('latin1') or > > > > urlib.unquote(c.user.firstName)[0].encode('utf8') > > > > I'm assuming here that the urlib.unquote(c.user.firstName) returns an > > > encodable string (which I'm absolutely not sure), but if it does, this > > > should take the first 'character'. > > > The OP stated that the original string was "encoded in UTF-8 and > > urllib.quote()", so after urllib.unquote the string is in UTF-8 format. > > This must be decoded into a Unicode string before removing the first > > character: > > > urllib.unquote(c.user.firstName).decode('utf-8')[0] > > > The next problem is that the character in the OP's example string 'Ù' is not > > present in the latin-1 encoding, but using utf-8 encoding demonstrates that > > the full two-byte UTF-8 encoded character is collected: > > > >>> import urllib > > >>> name = urllib.quote(u'Ùukasz'.encode('utf-8')) > > >>> name > > '%C5%81ukasz' > > >>> urllib.unquote(name).decode('utf-8')[0].encode('utf-8') > > '\xc5\x81' > > > -Mark > > @Mark, when I tried urllib.unquote(c.user.firstName).decode('utf-8') > [0].encode('utf-8'), I received this message: > > >> return render('/reports/create_report_step2.mako') > > Module pylons.templating:344 in render > << **cache_args) > return pylons.buffet.render(template_name=template, > fragment=fragment, > format=format, namespace=kargs, > **cache_args) > > >> format=format, namespace=kargs, **cache_args) > Module pylons.templating:229 in render > << log.debug("Rendering template %s with engine %s", > full_path, engine_name) > return engine_config['engine'].render(namespace, > template=full_path, > **options)>> **options) > Module mako.ext.turbogears:49 in render > << info.update(self.extra_vars_func()) > > return template.render(**info) > >> return template.render(**info) > Module mako.template:114 in render > << declared by this template's internal rendering method are > also pulled from the given *args, **data > members. members.""" > return runtime._render(self, self.callable_, args, data) > > def render_unicode(self, *args, **data):>> return > runtime._render(self, self.callable_, args, data) > Module mako.runtime:287 in _render > << context = Context(buf, **data) > context._with_template = template > _render_context(template, callable_, context, *args, > **_kwargs_for_callable(callable_, data)) > return context.pop_buffer().getvalue()>> > _render_context(template, callable_, context, *args, > **_kwargs_for_callable(callable_, data)) > Module mako.runtime:304 in _render_context > << # if main render method, call from the base of the > inheritance stack > (inherit, lclcontext) = _populate_self_namespace(context, > tmpl) > _exec_template(inherit, lclcontext, args=args, > kwargs=kwargs) > else: > # otherwise, call the actual rendering method specified>> > _exec_template(inherit, lclcontext, args=args, kwargs=kwargs) > Module mako.runtime:337 in _exec_template > << error_template.render_context(context, > error=error) > else: > callable_(context, *args, **kwargs)>> callable_(context, > *args, **kwargs) > Module _reports_create_report_step2_mako:57 in render_body > << > context.write(filters.decode.utf8(urllib.unquote(str(c.period.end)))) > context.write(u' + ') > > context.write(filters.decode.utf8(urllib.unquote(c.user.firstName).decode('utf-8') > [0].encode('utf-8'))) > > context.write(filters.decode.utf8(urllib.unquote(str(c.user.secondName) > [0:1]))) > context.write(u'</h3>\r\n <input type="hidden" > name="works[]" value="')>> > context.write(filters.decode.utf8(urllib.unquote(c.user.firstName).decode('utf-8') > [0].encode('utf-8'))) > Module encodings.utf_8:16 in decode > << > def decode(input, errors='strict'): > return codecs.utf_8_decode(input, errors, True) > > class IncrementalEncoder(codecs.IncrementalEncoder):>> return > codecs.utf_8_decode(input, errors, True) > <type 'exceptions.UnicodeEncodeError'>: 'ascii' codec can't encode > characters in position 0-1: ordinal not in range(128)
ok, I resolved this problem $ {urllib.unquote(str(c.user.firstName)).decode('utf-8')[0]} Could anyone explain me why this code works? -- http://mail.python.org/mailman/listinfo/python-list