Chris Stinemetz wrote: > Although I am certain it is not very efficient I was able to > accomplish what I wanted with the following code I wrote: > > import os > import pprint > import csv > from collections import defaultdict > > print_map = {'MOU':0, 'Call_Att':1, 'Device':2} > header = ['IMEI','MOUs','Call_Att','Device'] > > path = 'C:/Users/cs062x/Desktop/Panhandle' > > os.chdir(path) > running_MOU = {} > call_attempts = {} > d = defaultdict(list) > for fname in os.listdir('.'): > with open (fname) as csvfile: > spamreader = csv.reader(csvfile, delimiter=',', quotechar='|') > next(spamreader) > for row in spamreader: > > if row[8]: > device = row[36] > Elapsed_Mins = float(row[7]) > IMEI = row[8].replace("'", "") > > if IMEI in running_MOU.keys():
For big dicts in Python 2 the test key in some_dict.keys() is indeed very inefficient as it builds a list of keys first and then performs a linear scan for the key. Much better: key in some_dict This test avoids building the list and can also use an efficient lookup algorithm that is independent of the size of the dict. > running_MOU[IMEI] += Elapsed_Mins > else: > running_MOU[IMEI] = Elapsed_Mins > > if IMEI in call_attempts.keys(): > call_attempts[IMEI] += 1 > else: > call_attempts[IMEI] = 1 > > # if key matches append mou else append 0. > d[IMEI] = [running_MOU[IMEI]] > d[IMEI].append([call_attempts[IMEI]]) > d[IMEI].append([device]) > > > print ",".join(header) > for k,v in sorted(d.items()): > print k, ",", d[k][print_map['MOU']],",", > d[k][print_map['Call_Att']][0],",", d[k][print_map['Device']][0] > > print "complete" Here's an alternative that uses only one dict: import csv import os import sys header = ['IMEI', 'MOUs', 'Call_Att', 'Device'] path = 'C:/Users/cs062x/Desktop/Panhandle' d = {} for fname in os.listdir(path): with open(os.path.join(path, fname)) as csvfile: spamreader = csv.reader(csvfile, delimiter=',', quotechar='|') next(spamreader) for row in spamreader: if row[8]: device = row[36] elapsed_mins = float(row[7]) IMEI = row[8].replace("'", "") if IMEI in d: record = d[IMEI] record[1] += elapsed_mins record[2] += 1 else: d[IMEI] = [IMEI, elapsed_mins, 1, device] writer = csv.writer(sys.stdout) writer.writerow(header) writer.writerows(sorted(d.itervalues())) print "complete" _______________________________________________ Tutor maillist - Tutor@python.org To unsubscribe or change subscription options: https://mail.python.org/mailman/listinfo/tutor