time head -1000000 myfile >/dev/null real 0m4.57s user 0m3.81s sys 0m0.74s
time ./repnullsalt.py '|' myfile 0 1 Null columns: 11, 20, 21, 22, 23, 24, 25, 26, 27, 30, 31, 33, 45, 50, 68 real 1m28.94s user 1m28.11s sys 0m0.72s import sys def main(): with open(sys.argv[2],'rb') as inf: limit = sys.argv[3] if len(sys.argv)>3 else 1 dlm = sys.argv[1].encode('latin1') nulls = [x==b'' for x in next(inf)[:-1].split(dlm)] enum = enumerate split = bytes.split out = sys.stdout prn = print for j, r in enum(inf): if j%1000000==0: prn(j//1000000,end=' ') out.flush() if j//1000000>=limit: break for i, cur in enum(split(r[:-1],dlm)): nulls[i] |= cur==b'' print('Null columns:') print(', '.join(str(i+1) for i,val in enumerate(nulls) if val)) if not (len(sys.argv)>2): sys.exit("Usage: "+sys.argv[0]+ " <delimiter> <filename> <limit>") main() -- http://mail.python.org/mailman/listinfo/python-list