#======================================================== # Collect directory and its filenames as bytes path = b'/home/nikos/public_html/data/apps/' files = os.listdir( path )
for filename in files: # Compute 'path/to/filename' filepath_bytes = path + filename for encoding in ('utf-8', 'iso-8859-7', 'latin-1'): try: filepath = filepath_bytes.decode( encoding ) except UnicodeDecodeError: continue # Rename to something valid in UTF-8 if encoding != 'utf-8': os.rename( filepath_bytes, filepath.encode('utf-8') ) assert os.path.exists( filepath.encode('utf-8') ) break else: # This only runs if we never reached the break raise ValueError( 'unable to clean filename %r' % filepath_bytes ) # Collect filenames of the path dir as strings filenames = os.listdir( '/home/nikos/public_html/data/apps/' ) # Build a set of 'path/to/filename' based on the objects of path dir filepaths = set() for filename in filenames: filepaths.add( filename ) ================== # Load'em for filename in filenames: try: # Check the presence of a file against the database and insert if it doesn't exist cur.execute('''SELECT url FROM files WHERE url = %s''', filename ) data = cur.fetchone() ==================== [Wed Jun 12 10:56:56 2013] [error] [client 79.103.41.173] Traceback (most recent call last):, referer: http://superhost.gr/ [Wed Jun 12 10:56:56 2013] [error] [client 79.103.41.173] File "/home/ nikos/public_html/cgi-bin/files.py", line 102, in <module>, referer: http://superhost.gr/ [Wed Jun 12 10:56:56 2013] [error] [client 79.103.41.173] print ( filename ), referer: http://superhost.gr/ [Wed Jun 12 10:56:56 2013] [error] [client 79.103.41.173] File "/usr/ local/lib/python3.3/codecs.py", line 355, in write, referer: http:// superhost.gr/ [Wed Jun 12 10:56:56 2013] [error] [client 79.103.41.173] data, consumed = self.encode(object, self.errors), referer: http://superhost.gr/ [Wed Jun 12 10:56:56 2013] [error] [client 79.103.41.173] UnicodeEncodeError: 'utf-8' codec can't encode character '\\udcce' in position 0: surrogates not allowed, referer: http://superhost.gr/ ===================== i tried to insert print( filename ) sys.exit(0) just before the execute and the output is just Pacman.exe as seen in http://superhost.gr/?page=files.py Seens the encoding precedure successfully turned all the filenames from greek-iso to utf-8 without failing, why woul it still be encoding issues when it comes to execute? -- http://mail.python.org/mailman/listinfo/python-list