Sorry for displaying my code so many times, i know i ahve exhaust you but hti is the last thinkg i am gonna ask from you in this thread. We are very close to have this working.
#======================================================== # Collect directory and its filenames as bytes path = b'/home/nikos/public_html/data/apps/' files = os.listdir( path ) for filename in files: # Compute 'path/to/filename' filepath_bytes = path + filename for encoding in ('utf-8', 'iso-8859-7', 'latin-1'): try: filepath = filepath_bytes.decode( encoding ) except UnicodeDecodeError: continue # Rename to something valid in UTF-8 if encoding != 'utf-8': os.rename( filepath_bytes, filepath.encode('utf-8') ) assert os.path.exists( filepath ) break else: # This only runs if we never reached the break raise ValueError( 'unable to clean filename %r' % filepath_bytes ) #======================================================== # Collect filenames of the path dir as strings filenames = os.listdir( '/home/nikos/public_html/data/apps/' ) # Load'em for filename in filenames: try: # Check the presence of a file against the database and insert if it doesn't exist cur.execute('''SELECT url FROM files WHERE url = %s''', (filename,) ) data = cur.fetchone() if not data: # First time for file; primary key is automatic, hit is defaulted print( "iam here", filename + '\n' ) cur.execute('''INSERT INTO files (url, host, lastvisit) VALUES (%s, %s, %s)''', (filename, host, lastvisit) ) except pymysql.ProgrammingError as e: print( repr(e) ) #======================================================== # Collect filenames of the path dir as strings filenames = os.listdir( '/home/nikos/public_html/data/apps/' ) filepaths = () # Build a set of 'path/to/filename' based on the objects of path dir for filename in filenames: filepaths.add( filename ) # Delete spurious cur.execute('''SELECT url FROM files''') data = cur.fetchall() # Check database's filenames against path's filenames for rec in data: if rec not in filepaths: cur.execute('''DELETE FROM files WHERE url = %s''', rec ) ================================================= [Sun Jun 09 00:16:14 2013] [error] [client 79.103.41.173] Original exception was: [Sun Jun 09 00:16:14 2013] [error] [client 79.103.41.173] Traceback (most recent call last): [Sun Jun 09 00:16:14 2013] [error] [client 79.103.41.173] File "/home/nikos/public_html/cgi-bin/files.py", line 78, in <module> [Sun Jun 09 00:16:14 2013] [error] [client 79.103.41.173] assert os.path.exists( filepath ) [Sun Jun 09 00:16:14 2013] [error] [client 79.103.41.173] File "/usr/local/lib/python3.3/genericpath.py", line 18, in exists [Sun Jun 09 00:16:14 2013] [error] [client 79.103.41.173] os.stat(path) [Sun Jun 09 00:16:14 2013] [error] [client 79.103.41.173] UnicodeEncodeError: 'ascii' codec can't encode characters in position 34-37: ordinal not in range(128) ================== Asserts what to make sure the the path/to/file afetr the rename exists but why are we still get those unicodeencodeerrors? -- http://mail.python.org/mailman/listinfo/python-list