Oscar Del Ben wrote:
So I'm trying to send a file through webpy and urllib2 but I can't get
around these UnicodeErrors. Here's the code:

# controller

x = web.input(video_original={})
params = {'foo': x['foo']}

files = (('video[original]', 'test', x['video_original'].file.read
()),)
client.upload(upload_url, params, files, access_token())

# client library

def __encodeMultipart(self, fields, files):
        """
        fields is a sequence of (name, value) elements for regular
form fields.
        files is a sequence of (name, filename, value) elements for
data to be uploaded as files
        Return (content_type, body) ready for httplib.HTTP instance
        """
        boundary = mimetools.choose_boundary()
        crlf = '\r\n'

        l = []
        for k, v in fields.iteritems():
            l.append('--' + boundary)
            l.append('Content-Disposition: form-data; name="%s"' % k)
            l.append('')
            l.append(v)
        for (k, f, v) in files:
            l.append('--' + boundary)
            l.append('Content-Disposition: form-data; name="%s";
filename="%s"' % (k, f))
            l.append('Content-Type: %s' % self.__getContentType(f))
            l.append('')
            l.append(v)
        l.append('--' + boundary + '--')
        l.append('')
        body = crlf.join(l)

        return boundary, body

    def __getContentType(self, filename):
        return mimetypes.guess_type(filename)[0] or 'application/octet-
stream'

    def upload(self, path, post_params, files, token=None):

      if token:
        token = oauth.OAuthToken.from_string(token)

      url = "http://%s%s"; % (self.authority, path)

      (boundary, body) = self.__encodeMultipart(post_params, files)

      headers = {'Content-Type': 'multipart/form-data; boundary=%s' %
boundary,
          'Content-Length': str(len(body))
          }

      request = oauth.OAuthRequest.from_consumer_and_token(
        self.consumer,
        token,
        http_method='POST',
        http_url=url,
        parameters=post_params
      )

      request.sign_request(oauth.OAuthSignatureMethod_HMAC_SHA1(),
self.consumer, token)

      request = urllib2.Request(request.http_url, postdata=body,
headers=headers)
      request.get_method = lambda: 'POST'

      return urllib2.urlopen(request)

Unfortunately I get two kinds of unicode error, the first one in the
crlf.join(l):

Traceback (most recent call last):
  File "/Users/oscar/projects/work/whitelabel/web/application.py",
line 242, in process
    return self.handle()
  File "/Users/oscar/projects/work/whitelabel/web/application.py",
line 233, in handle
    return self._delegate(fn, self.fvars, args)
  File "/Users/oscar/projects/work/whitelabel/web/application.py",
line 412, in _delegate
    return handle_class(cls)
  File "/Users/oscar/projects/work/whitelabel/web/application.py",
line 387, in handle_class
    return tocall(*args)
  File "/Users/oscar/projects/work/whitelabel/code.py", line 328, in
POST
    return simplejson.load(client.upload(upload_url, params, files,
access_token()))
  File "/Users/oscar/projects/work/whitelabel/oauth_client.py", line
131, in upload
    (boundary, body) = self.__encodeMultipart(post_params, files)
  File "/Users/oscar/projects/work/whitelabel/oauth_client.py", line
111, in __encodeMultipart
    body = crlf.join(l)
UnicodeDecodeError: 'ascii' codec can't decode byte 0xb7 in position
42: ordinal not in range(128)


And here's another one:

Traceback (most recent call last):
  File "/Users/oscar/projects/work/whitelabel/web/application.py",
line 242, in process
    return self.handle()
  File "/Users/oscar/projects/work/whitelabel/web/application.py",
line 233, in handle
    return self._delegate(fn, self.fvars, args)
  File "/Users/oscar/projects/work/whitelabel/web/application.py",
line 412, in _delegate
    return handle_class(cls)
  File "/Users/oscar/projects/work/whitelabel/web/application.py",
line 387, in handle_class
    return tocall(*args)
  File "/Users/oscar/projects/work/whitelabel/code.py", line 328, in
POST
    return simplejson.load(client.upload(upload_url, params, files,
access_token()))
  File "/Users/oscar/projects/work/whitelabel/oauth_client.py", line
131, in upload
    (boundary, body) = self.__encodeMultipart(post_params, files)
  File "/Users/oscar/projects/work/whitelabel/oauth_client.py", line
111, in __encodeMultipart
    body = crlf.join(l)
UnicodeDecodeError: 'ascii' codec can't decode byte 0xb7 in position
42: ordinal not in range(128)

Does anyone know why this errors happens and what I should do to
prevent them? Many thanks.

Oscar

I did a short test to demonstrate the likely problem, without all the other libraries and complexity.

lst = ["abc"]
lst.append("def")
lst.append(u"abc")
lst.append("g\x48\x82\x94i")
print lst
print "**".join(lst)


That fragment of code generates (in Python 2.6) the following output and traceback:

['abc', 'def', u'abc', 'gH\x82\x94i']
Traceback (most recent call last):
 File "M:\Programming\Python\sources\dummy\stuff2.py", line 10, in <module>
   print "**".join(lst)
UnicodeDecodeError: 'ascii' codec can't decode byte 0x82 in position 2: ordinal not in range(128)


You'll notice that one of the strings is a unicode one, and another one has the character 0x82 in it. Once join() discovers Unicode, it needs to produce a Unicode string, and by default, it uses the ASCII codec to get it.

If you print your 'l' list (bad name, by the way, looks too much like a '1'), you can see which element is Unicode, and which one has the \xb7 in position 42. You'll have to decide which is the problem, and solve it accordingly. Was the fact that one of the strings is unicode an oversight? Or did you think that all characters would be 0x7f or less? Or do you want to handle all possible characters, and if so, with what encoding?

DaveA


--
http://mail.python.org/mailman/listinfo/python-list

Reply via email to