take a look at numpy and don't necessarily give us the whole code. it becomes too long without purpose
Abdur-Rahmaan Janhangeer, Mauritius abdurrahmaanjanhangeer.wordpress.com On 6 Jun 2017 03:26, "syed zaidi" <syedzaid...@hotmail.co.uk> wrote: hi, I would appreciate if you can help me suggesting a quick and efficient strategy for comparing multiple lists with one principal list I have about 125 lists containing about 100,000 numerical entries in each my principal list contains about 6 million entries. I want to compare each small list with main list and append yes/no or 0/1 in each new list corresponding to each of 125 lists The program is working but it takes ages to process huge files, Can someone pleases tell me how can I make this process fast. Right now it takes arounf 2 weeks to complete this task the code I have written and is working is as under: sample_name = [] main_op_list,principal_list = [],[] dictionary = {} with open("C:/Users/INVINCIBLE/Desktop/T2D_ALL_blastout_batch.txt", 'r') as f: reader = csv.reader(f, dialect = 'excel', delimiter='\t') list2 = filter(None, reader) for i in range(len(list2)): col1 = list2[i][0] operon = list2[i][1] main_op_list.append(operon) col1 = col1.strip().split("_") sample_name = col1[0] if dictionary.get(sample_name): dictionary[sample_name].append(operon) else: dictionary[sample_name] = [] dictionary[sample_name].append(operon) locals().update(dictionary) ## converts dictionary keys to variables ##print DLF004 dict_values = dictionary.values() dict_keys = dictionary.keys() print dict_keys print len(dict_keys) main_op_list_np = np.array(main_op_list) DLF002_1,DLF004_1,DLF005_1,DLF006_1,DLF007_1,DLF008_1, DLF009_1,DLF010_1,DLF012_1,DLF013_1,DLF014_1,DLM001_1, DLM002_1,DLM003_1,DLM004_1,DLM005_1,DLM006_1,DLM009_1, DLM011_1,DLM012_1,DLM018_1,DOF002_1,DOF003_1 =[],[],[],[],[],[],[],[],[],[] ,[],[],[],[],[],[],[],[],[],[],[],[],[] DOF004_1,DOF006_1,DOF007_1,DOF008_1,DOF009_1,DOF010_1, DOF011_1,DOF012_1,DOF013_1,DOF014_1,DOM001_1,DOM003_1, DOM005_1,DOM008_1,DOM010_1,DOM012_1,DOM013_1,DOM014_1, DOM015_1,DOM016_1,DOM017_1,DOM018_1,DOM019_1 =[],[],[],[],[],[],[],[],[],[] ,[],[],[],[],[],[],[],[],[],[],[],[],[] DOM020_1,DOM021_1,DOM022_1,DOM023_1,DOM024_1,DOM025_1,DOM026_1 = [],[],[],[],[],[],[] NLF001_1,NLF002_1,NLF005_1,NLF006_1,NLF007_1,NLF008_1, NLF009_1,NLF010_1,NLF011_1,NLF012_1,NLF013_1,NLF014_1, NLF015_1,NLM001_1,NLM002_1,NLM003_1,NLM004_1,NLM005_1, NLM006_1,NLM007_1,NLM008_1,NLM009_1,NLM010_1 =[],[],[],[],[],[],[],[],[],[] ,[],[],[],[],[],[],[],[],[],[],[],[],[] NLM015_1,NLM016_1,NLM017_1,NLM021_1,NLM022_1,NLM023_1, NLM024_1,NLM025_1,NLM026_1,NLM027_1,NLM028_1,NLM029_1, NLM031_1,NLM032_1,NOF001_1,NOF002_1,NOF004_1,NOF005_1, NOF006_1,NOF007_1,NOF008_1,NOF009_1,NOF010_1 =[],[],[],[],[],[],[],[],[],[] ,[],[],[],[],[],[],[],[],[],[],[],[],[] NOF011_1,NOF012_1,NOF013_1,NOF014_1,NOM001_1,NOM002_1, NOM004_1,NOM005_1,NOM007_1,NOM008_1,NOM009_1,NOM010_1, NOM012_1,NOM013_1,NOM015_1,NOM016_1,NOM017_1,NOM018_1, NOM019_1,NOM020_1,NOM022_1,NOM023_1,NOM025_1 =[],[],[],[],[],[],[],[],[],[] ,[],[],[],[],[],[],[],[],[],[],[],[],[] NOM026_1,NOM027_1,NOM028_1,NOM029_1 = [],[],[],[] for i in main_op_list_np: if i in DLF002: DLF002_1.append('1') else:DLF002_1.append('0') if i in DLF004: DLF004_1.append('1') else:DLF004_1.append('0') if i in DLF005: DLF005_1.append('1') else:DLF005_1.append('0') if i in DLF006: DLF006_1.append('1') else:DLF006_1.append('0') if i in DLF007: DLF007_1.append('1') else:DLF007_1.append('0') if i in DLF008: DLF008_1.append('1') else:DLF008_1.append('0') ## if main_op_list[i] in DLF009: DLF009_1.append('1') ## else:DLF009_1.append('0') if i in DLF010: DLF010_1.append('1') else:DLF010_1.append('0') if i in DLF012: DLF012_1.append('1') else:DLF012_1.append('0') if i in DLF013: DLF013_1.append('1') else:DLF013_1.append('0') if i in DLF014: DLF014_1.append('1') else:DLF014_1.append('0') if i in DLM001: DLM001_1.append('1') else:DLM001_1.append('0') if i in DLM002: DLM002_1.append('1') else:DLM002_1.append('0') if i in DLM003: DLM003_1.append('1') else:DLM003_1.append('0') if i in DLM004: DLM004_1.append('1') else:DLM004_1.append('0') if i in DLM005: DLM005_1.append('1') else:DLM005_1.append('0') if i in DLM006: DLM006_1.append('1') else:DLM006_1.append('0') if i in DLM009: DLM009_1.append('1') else:DLM009_1.append('0') if i in DLM011: DLM011_1.append('1') else:DLM011_1.append('0') if i in DLM012: DLM012_1.append('1') else:DLM012_1.append('0') if i in DLM018: DLM018_1.append('1') else:DLM018_1.append('0') if i in DOF002: DOF002_1.append('1') else:DOF002_1.append('0') if i in DOF003: DOF003_1.append('1') else:DOF003_1.append('0') if i in DOF004: DOF004_1.append('1') else:DOF004_1.append('0') if i in DOF006: DOF006_1.append('1') else:DOF006_1.append('0') if i in DOF007: DOF007_1.append('1') else:DOF007_1.append('0') if i in DOF008: DOF008_1.append('1') else:DOF008_1.append('0') if i in DOF009: DOF009_1.append('1') else:DOF009_1.append('0') if i in DOF010: DOF010_1.append('1') else:DOF010_1.append('0') if i in DOF011: DOF011_1.append('1') else:DOF011_1.append('0') if i in DOF012: DOF012_1.append('1') else:DOF012_1.append('0') if i in DOF013: DOF013_1.append('1') else:DOF013_1.append('0') if i in DOF014: DOF014_1.append('1') else:DOF014_1.append('0') if i in DOM001: DOM001_1.append('1') else:DOM001_1.append('0') if i in DOM003: DOM003_1.append('1') else:DOM003_1.append('0') if i in DOM005: DOM005_1.append('1') else:DOM005_1.append('0') if i in DOM008: DOM008_1.append('1') else:DOM008_1.append('0') if i in DOM010: DOM010_1.append('1') else:DOM010_1.append('0') if i in DOM012: DOM012_1.append('1') else:DOM012_1.append('0') if i in DOM013: DOM013_1.append('1') else:DOM013_1.append('0') if i in DOM014: DOM014_1.append('1') else:DOM014_1.append('0') if i in DOM015: DOM015_1.append('1') else:DOM015_1.append('0') if i in DOM016: DOM016_1.append('1') else:DOM016_1.append('0') if i in DOM017: DOM017_1.append('1') else:DOM017_1.append('0') if i in DOM018: DOM018_1.append('1') else:DOM018_1.append('0') if i in DOM019: DOM019_1.append('1') else:DOM019_1.append('0') if i in DOM020: DOM020_1.append('1') else:DOM020_1.append('0') if i in DOM021: DOM021_1.append('1') else:DOM021_1.append('0') if i in DOM022: DOM022_1.append('1') else:DOM022_1.append('0') if i in DOM023: DOM023_1.append('1') else:DOM023_1.append('0') if i in DOM024: DOM024_1.append('1') else:DOM024_1.append('0') if i in DOM025: DOM025_1.append('1') else:DOM025_1.append('0') if i in DOM026: DOM026_1.append('1') else:DOM026_1.append('0') if i in NLF001: NLF001_1.append(' | 1') else:NLF001_1.append(' | 0') if i in NLF002: NLF002_1.append('1') else:NLF002_1.append('0') if i in NLF005: NLF005_1.append('1') else:NLF005_1.append('0') if i in NLF006: NLF006_1.append('1') else:NLF006_1.append('0') if i in NLF007: NLF007_1.append('1') else:NLF007_1.append('0') if i in NLF008: NLF008_1.append('1') else:NLF008_1.append('0') if i in NLF009: NLF009_1.append('1') else:NLF009_1.append('0') if i in NLF010: NLF010_1.append('1') else:NLF010_1.append('0') if i in NLF011: NLF011_1.append('1') else:NLF011_1.append('0') if i in NLF012: NLF012_1.append('1') else:NLF012_1.append('0') if i in NLF013: NLF013_1.append('1') else:NLF013_1.append('0') if i in NLF014: NLF014_1.append('1') else:NLF014_1.append('0') if i in NLF015: NLF015_1.append('1') else:NLF015_1.append('0') if i in NLM001: NLM001_1.append('1') else:NLM001_1.append('0') if i in NLM002: NLM002_1.append('1') else:NLM002_1.append('0') if i in NLM003: NLM003_1.append('1') else:NLM003_1.append('0') if i in NLM004: NLM004_1.append('1') else:NLM004_1.append('0') if i in NLM005: NLM005_1.append('1') else:NLM005_1.append('0') if i in NLM006: NLM006_1.append('1') else:NLM006_1.append('0') if i in NLM007: NLM007_1.append('1') else:NLM007_1.append('0') if i in NLM008: NLM008_1.append('1') else:NLM008_1.append('0') if i in NLM009: NLM009_1.append('1') else:NLM009_1.append('0') if i in NLM010: NLM010_1.append('1') else:NLM010_1.append('0') if i in NLM015: NLM015_1.append('1') else:NLM015_1.append('0') if i in NLM016: NLM016_1.append('1') else:NLM016_1.append('0') if i in NLM017: NLM017_1.append('1') else:NLM017_1.append('0') if i in NLM021: NLM021_1.append('1') else:NLM021_1.append('0') if i in NLM022: NLM022_1.append('1') else:NLM022_1.append('0') if i in NLM023: NLM023_1.append('1') else:NLM023_1.append('0') if i in NLM024: NLM024_1.append('1') else:NLM024_1.append('0') if i in NLM025: NLM025_1.append('1') else:NLM025_1.append('0') if i in NLM026: NLM026_1.append('1') else:NLM026_1.append('0') if i in NLM027: NLM027_1.append('1') else:NLM027_1.append('0') if i in NLM028: NLM028_1.append('1') else:NLM028_1.append('0') if i in NLM029: NLM029_1.append('1') else:NLM029_1.append('0') if i in NLM031: NLM031_1.append('1') else:NLM031_1.append('0') if i in NLM032: NLM032_1.append('1') else:NLM032_1.append('0') if i in NOF001: NOF001_1.append('1') else:NOF001_1.append('0') if i in NOF002: NOF002_1.append('1') else:NOF002_1.append('0') if i in NOF004: NOF004_1.append('1') else:NOF004_1.append('0') if i in NOF005: NOF005_1.append('1') else:NOF005_1.append('0') if i in NOF006: NOF006_1.append('1') else:NOF006_1.append('0') if i in NOF007: NOF007_1.append('1') else:NOF007_1.append('0') if i in NOF008: NOF008_1.append('1') else:NOF008_1.append('0') if i in NOF009: NOF009_1.append('1') else:NOF009_1.append('0') if i in NOF010: NOF010_1.append('1') else:NOF010_1.append('0') if i in NOF011: NOF011_1.append('1') else:NOF011_1.append('0') if i in NOF012: NOF012_1.append('1') else:NOF012_1.append('0') if i in NOF013: NOF013_1.append('1') else:NOF013_1.append('0') if i in NOF014: NOF014_1.append('1') else:NOF014_1.append('0') if i in NOM001: NOM001_1.append('1') else:NOM001_1.append('0') if i in NOM002: NOM002_1.append('1') else:NOM002_1.append('0') if i in NOM004: NOM004_1.append('1') else:NOM004_1.append('0') if i in NOM005: NOM005_1.append('1') else:NOM005_1.append('0') if i in NOM007: NOM007_1.append('1') else:NOM007_1.append('0') if i in NOM008: NOM008_1.append('1') else:NOM008_1.append('0') if i in NOM009: NOM009_1.append('1') else:NOM009_1.append('0') if i in NOM010: NOM010_1.append('1') else:NOM010_1.append('0') if i in NOM012: NOM012_1.append('1') else:NOM012_1.append('0') if i in NOM013: NOM013_1.append('1') else:NOM013_1.append('0') if i in NOM015: NOM015_1.append('1') else:NOM015_1.append('0') if i in NOM016: NOM016_1.append('1') else:NOM016_1.append('0') if i in NOM017: NOM017_1.append('1') else:NOM017_1.append('0') if i in NOM018: NOM018_1.append('1') else:NOM018_1.append('0') if i in NOM019: NOM019_1.append('1') else:NOM019_1.append('0') if i in NOM020: NOM020_1.append('1') else:NOM020_1.append('0') if i in NOM022: NOM022_1.append('1') else:NOM022_1.append('0') if i in NOM023: NOM023_1.append('1') else:NOM023_1.append('0') if i in NOM025: NOM025_1.append('1') else:NOM025_1.append('0') if i in NOM026: NOM026_1.append('1') else:NOM026_1.append('0') if i in NOM027: NOM027_1.append('1') else:NOM027_1.append('0') if i in NOM028: NOM028_1.append('1') else:NOM028_1.append('0') if i in NOM029: NOM029_1.append('1') else:NOM029_1.append('0') ## print 'saving' zoo = zip(main_op_list, DLF002_1,DLF004_1,DLF005_1, DLF006_1,DLF007_1,DLF008_1,DLF009_1,DLF010_1,DLF012_1, DLF013_1,DLF014_1,DLM001_1,DLM002_1,DLM003_1,DLM004_1, DLM005_1,DLM006_1,DLM009_1,DLM011_1,DLM012_1,DLM018_1, DOF002_1,DOF003_1,DOF004_1,DOF006_1,DOF007_1,DOF008_1, DOF009_1,DOF010_1,DOF011_1,DOF012_1,DOF013_1,DOF014_1, DOM001_1,DOM003_1,DOM005_1,DOM008_1,DOM010_1,DOM012_1, DOM013_1,DOM014_1,DOM015_1,DOM016_1,DOM017_1,DOM018_1, DOM019_1,DOM020_1,DOM021_1,DOM022_1,DOM023_1,DOM024_1, DOM025_1,DOM026_1,NLF001_1,NLF002_1,NLF005_1,NLF006_1, NLF007_1,NLF008_1,NLF009_1,NLF010_1,NLF011_1,NLF012_1, NLF013_1,NLF014_1,NLF015_1,NLM001_1,NLM002_1,NLM003_1, NLM004_1,NLM005_1,NLM006_1,NLM007_1,NLM008_1,NLM009_1, NLM010_1,NLM015_1,NLM016_1,NLM017_1,NLM021_1,NLM022_1, NLM023_1,NLM024_1,NLM025_1,NLM026_1,NLM027_1,NLM028_1, NLM029_1,NLM031_1,NLM032_1,NOF001_1,NOF002_1,NOF004_1, NOF005_1,NOF006_1,NOF007_1,NOF008_1,NOF009_1,NOF010_1, NOF011_1,NOF012_1,NOF013_1,NOF014_1,NOM001_1,NOM002_1, NOM004_1,NOM005_1,NOM007_1,NO M008_1,NOM009_1,NOM010_1,NOM012_1,NOM013_1,NOM015_1, NOM016_1,NOM017_1,NOM018_1,NOM019_1,NOM020_1,NOM022_1, NOM023_1,NOM025_1,NOM026_1,NOM027_1,NOM028_1,NOM029_1) with open("test.tab", 'w+') as outfile: writer =csv.writer(outfile, delimiter = '\t', lineterminator = '\n') writer.writerow([' ','DLF2','DLF4','DLF5','DLF6', 'DLF7','DLF8','DLF9','DLF10','DLF12','DLF13','DLF14','DLM1', 'DLM2','DLM3','DLM4','DLM5','DLM6','DLM9','DLM11','DLM12',' DLM18','DOF2','DOF3','DOF4','DOF6','DOF7','DOF8','DOF9',' DOF10','DOF11','DOF12','DOF13','DOF04','DOM1','DOM3','DOM5', 'DOM8','DOM10','DOM12','DOM13','DOM14','DOM15','DOM16',' DOM17','DOM18','DOM19','DOM20','DOM21','DOM22','DOM23',' DOM24','DOM25','DOM26','NLF1','NLF2','NLF5','NLF6','NLF7',' NLF8','NLF9','NLF10','NLF11','NLF12','NLF13','NLF14','NLF15' ,'NLM1','NLM2','NLM3','NLM4','NLM5','NLM6','NLM7','NLM8',' NLM9','NLM10','NLM15','NLM16','NLM17','NLM21','NLM22',' NLM23','NLM24','NLM25','NLM26','NLM27','NLM28','NLM29',' NLM31','NLM32','NOF1','NOF2','NOF4','NOF5','NOF6','NOF7',' NOF8','NOF9','NOF10','NOF11','NOF12','NOF13','NOF14','NOM1', 'NOM2','NOM4','NOM5','NOM7','NOM8','NOM9','NOM10','NOM12',' NOM13','NOM15','NOM16','NOM17','NOM18','NOM19','NOM20',' NOM22','NOM23','NOM25','NOM26','NOM27','NOM28','NOM29']) writer.writerows(zoo) outfile.close() print 'done' end_time = time.time() elapsed = end_time-start_time print "Time elapsed.", elapsed Thanks Best Regards Syed Shujaat Ali Zaidi PhD Scholar (Bioinformatics) MOE Key Laboratory of Bioinformatics Bioinformatics Division, TNLIST & Department of Automation FIT 1-107, Tsinghua University, Beijing 100084, China Lecturer (Bioinformatics) Department of Bio Sciences COMSATS Institute of Information Technology Islamabad, Pakistan _______________________________________________ Tutor maillist - Tutor@python.org To unsubscribe or change subscription options: https://mail.python.org/mailman/listinfo/tutor _______________________________________________ Tutor maillist - Tutor@python.org To unsubscribe or change subscription options: https://mail.python.org/mailman/listinfo/tutor