Hi, Try this: example1<-data.frame(V1=c("rs4685","rs4685","rs788018","rs788023"),V2=c("2:198257795","2:198257795","2:198265526","2:198283305")) example2<-data.frame(V1=c("rs4685","rs4675","rs788018","rs788023"),V2=c("2:198257795","2:198258795","2:198265526","2:198283305")) subset(example2,!(V1 %in% example1$V1))
V1 V2 2 rs4675 2:198258795 A.K. ----- Original Message ----- From: nathalie <n...@sanger.ac.uk> To: r-help@r-project.org Cc: Sent: Monday, June 25, 2012 12:37 PM Subject: [R] setdiff datframes hi, I have 2 files example 1 and example 2 and would like to know what is in example2 and not in example1 (attached) V1 contain data which could be in duplicated which I am using as identifiers I used setdiff(example2$V1,example1$V1) to find the identifiers which are specific to example2: [1] "rs2276598" "rs17253672" I am looking for a way to get an output with all columns (V1 to V14) for these 2 identifiers.... thanks for any suggestions format example1 V1 V2 V3 V4 V5 V6 1 rs4685 2:198257795 C ENSG00000115524 ENST00000424674 Transcript 2 rs4685 2:198257795 C ENSG00000115524 ENST00000335508 Transcript 3 rs788018 2:198265526 G ENSG00000115524 ENST00000335508 Transcript 4 rs788023 2:198283305 C ENSG00000115524 ENST00000335508 Transcript 5 rs41284843 2:25536827 A ENSG00000119772 ENST00000406659 Transcript 6 rs41284843 2:25536827 A ENSG00000119772 ENST00000321117 Transcript 7 rs41284843 2:25536827 A ENSG00000119772 ENST00000264709 Transcript 8 rs41284843 2:25536827 A ENSG00000119772 ENST00000380756 Transcript 9 rs3729680 3:178927410 G ENSG00000121879 ENST00000263967 Transcript 10 rs61744960 4:106156163 A ENSG00000168769 ENST00000305737 Transcript 11 rs61744960 4:106156163 A ENSG00000168769 ENST00000413648 Transcript 12 rs61744960 4:106156163 A ENSG00000168769 ENST00000540549 Transcript 13 rs61744960 4:106156163 A ENSG00000168769 ENST00000545826 Transcript 14 rs61744960 4:106156163 A ENSG00000168769 ENST00000380013 Transcript 15 rs61744960 4:106156163 A ENSG00000168769 ENST00000535110 Transcript 16 rs61744960 4:106156163 A ENSG00000168769 ENST00000394764 Transcript 17 rs61744960 4:106156163 A ENSG00000168769 ENST00000513237 Transcript 18 rs61744960 4:106156163 A ENSG00000168769 ENST00000265149 Transcript 19 rs2454206 4:106196951 G ENSG00000168769 ENST00000540549 Transcript 20 rs2454206 4:106196951 G ENSG00000168769 ENST00000513237 Transcript V7 V8 V9 V10 V11 V12 V13 1 SYNONYMOUS_CODING 704 705 235 V gtA/gtG rs4685 2 SYNONYMOUS_CODING 3749 3657 1219 V gtA/gtG rs4685 3 SYNONYMOUS_CODING 2723 2631 877 G ggT/ggC rs788018 4 SYNONYMOUS_CODING 515 423 141 K aaA/aaG rs788023 5 SYNONYMOUS_CODING 365 27 9 P ccC/ccT rs41284843 6 SYNONYMOUS_CODING 264 27 9 P ccC/ccT rs41284843 7 SYNONYMOUS_CODING 365 27 9 P ccC/ccT rs41284843 8 NMD_TRANSCRIPT,SYNONYMOUS_CODING 264 27 9 P ccC/ccT rs41284843 9 NON_SYNONYMOUS_CODING 1330 1173 391 I/M atA/atG rs3729680 10 NON_SYNONYMOUS_CODING 1468 1064 355 G/D gGt/gAt rs61744960 11 NON_SYNONYMOUS_CODING 1204 1064 355 G/D gGt/gAt rs61744960 12 NON_SYNONYMOUS_CODING 1924 1064 355 G/D gGt/gAt rs61744960 13 NON_SYNONYMOUS_CODING 1924 1064 355 G/D gGt/gAt rs61744960 14 NON_SYNONYMOUS_CODING 1450 1064 355 G/D gGt/gAt rs61744960 15 NON_SYNONYMOUS_CODING 1167 1064 355 G/D gGt/gAt rs61744960 16 NON_SYNONYMOUS_CODING 1450 1064 355 G/D gGt/gAt rs61744960 17 NON_SYNONYMOUS_CODING 1924 1127 376 G/D gGt/gAt rs61744960 18 NMD_TRANSCRIPT,NON_SYNONYMOUS_CODING 1450 1064 355 G/D gGt/gAt rs61744960 19 NON_SYNONYMOUS_CODING 6144 5284 1762 I/V Ata/Gta rs2454206 20 NON_SYNONYMOUS_CODING 6144 5347 1783 I/V Ata/Gta rs2454206 V14 1 ENSP=ENSP00000409435;HGNC=SF3B1 2 ENSP=ENSP00000335321;HGNC=SF3B1 3 ENSP=ENSP00000335321;HGNC=SF3B1 4 ENSP=ENSP00000335321;HGNC=SF3B1 5 ENSP=ENSP00000384852;HGNC=DNMT3A 6 ENSP=ENSP00000324375;HGNC=DNMT3A 7 ENSP=ENSP00000264709;HGNC=DNMT3A 8 ENSP=ENSP00000370132;HGNC=DNMT3A 9 ENSP=ENSP00000263967;PolyPhen=benign(0.019);SIFT=tolerated(0.13);HGNC=PIK3CA 10 ENSP=ENSP00000306705;PolyPhen=probably_damaging(0.983);SIFT=deleterious(0.01);HGNC=TET2 11 ENSP=ENSP00000391448;PolyPhen=possibly_damaging(0.825);SIFT=deleterious(0);HGNC=TET2 12 ENSP=ENSP00000442788;PolyPhen=possibly_damaging(0.825);SIFT=deleterious(0);HGNC=TET2 13 ENSP=ENSP00000442867;PolyPhen=probably_damaging(0.952);SIFT=deleterious(0.01);HGNC=TET2 14 ENSP=ENSP00000369351;PolyPhen=possibly_damaging(0.825);SIFT=deleterious(0);HGNC=TET2 15 ENSP=ENSP00000438851;PolyPhen=probably_damaging(0.998);SIFT=deleterious(0.01);HGNC=TET2 16 ENSP=ENSP00000378245;PolyPhen=probably_damaging(0.983);SIFT=deleterious(0.01);HGNC=TET2 17 ENSP=ENSP00000425443;PolyPhen=possibly_damaging(0.825);SIFT=deleterious(0);HGNC=TET2 18 ENSP=ENSP00000265149;PolyPhen=probably_damaging(0.952);SIFT=deleterious(0.01);HGNC=TET2 19 ENSP=ENSP00000442788;PolyPhen=benign(0.029);SIFT=tolerated(0.15);HGNC=TET2 20 ENSP=ENSP00000425443;PolyPhen=benign(0.029);SIFT=tolerated(0.15);HGNC=TET2 > example2 V1 V2 V3 V4 V5 V6 1 rs4685 2:198257795 C ENSG00000115524 ENST00000424674 Transcript 2 rs4685 2:198257795 C ENSG00000115524 ENST00000335508 Transcript 3 rs788018 2:198265526 G ENSG00000115524 ENST00000335508 Transcript 4 rs788023 2:198283305 C ENSG00000115524 ENST00000335508 Transcript 5 rs2276598 2:25469502 T ENSG00000119772 ENST00000321117 Transcript 6 rs2276598 2:25469502 T ENSG00000119772 ENST00000380756 Transcript 7 rs2276598 2:25469502 T ENSG00000119772 ENST00000402667 Transcript 8 rs2276598 2:25469502 T ENSG00000119772 ENST00000380746 Transcript 9 rs2276598 2:25469502 T ENSG00000119772 ENST00000264709 Transcript 10 rs3729680 3:178927410 G ENSG00000121879 ENST00000263967 Transcript 11 rs61744960 4:106156163 A ENSG00000168769 ENST00000305737 Transcript 12 rs61744960 4:106156163 A ENSG00000168769 ENST00000413648 Transcript 13 rs61744960 4:106156163 A ENSG00000168769 ENST00000540549 Transcript 14 rs61744960 4:106156163 A ENSG00000168769 ENST00000545826 Transcript 15 rs61744960 4:106156163 A ENSG00000168769 ENST00000380013 Transcript 16 rs61744960 4:106156163 A ENSG00000168769 ENST00000535110 Transcript 17 rs61744960 4:106156163 A ENSG00000168769 ENST00000394764 Transcript 18 rs61744960 4:106156163 A ENSG00000168769 ENST00000513237 Transcript 19 rs61744960 4:106156163 A ENSG00000168769 ENST00000265149 Transcript 20 rs17253672 4:106156187 T ENSG00000168769 ENST00000305737 Transcript V7 V8 V9 V10 V11 V12 V13 1 SYNONYMOUS_CODING 704 705 235 V gtA/gtG rs4685 2 SYNONYMOUS_CODING 3749 3657 1219 V gtA/gtG rs4685 3 SYNONYMOUS_CODING 2723 2631 877 G ggT/ggC rs788018 4 SYNONYMOUS_CODING 515 423 141 K aaA/aaG rs788023 5 SYNONYMOUS_CODING 1503 1266 422 L ctG/ctA rs2276598 6 NMD_TRANSCRIPT,SYNONYMOUS_CODING 1503 1266 422 L ctG/ctA rs2276598 7 SYNONYMOUS_CODING 745 597 199 L ctG/ctA rs2276598 8 SYNONYMOUS_CODING 813 699 233 L ctG/ctA rs2276598 9 SYNONYMOUS_CODING 1604 1266 422 L ctG/ctA rs2276598 10 NON_SYNONYMOUS_CODING 1330 1173 391 I/M atA/atG rs3729680 11 NON_SYNONYMOUS_CODING 1468 1064 355 G/D gGt/gAt rs61744960 12 NON_SYNONYMOUS_CODING 1204 1064 355 G/D gGt/gAt rs61744960 13 NON_SYNONYMOUS_CODING 1924 1064 355 G/D gGt/gAt rs61744960 14 NON_SYNONYMOUS_CODING 1924 1064 355 G/D gGt/gAt rs61744960 15 NON_SYNONYMOUS_CODING 1450 1064 355 G/D gGt/gAt rs61744960 16 NON_SYNONYMOUS_CODING 1167 1064 355 G/D gGt/gAt rs61744960 17 NON_SYNONYMOUS_CODING 1450 1064 355 G/D gGt/gAt rs61744960 18 NON_SYNONYMOUS_CODING 1924 1127 376 G/D gGt/gAt rs61744960 19 NMD_TRANSCRIPT,NON_SYNONYMOUS_CODING 1450 1064 355 G/D gGt/gAt rs61744960 20 NON_SYNONYMOUS_CODING 1492 1088 363 P/L cCt/cTt rs17253672 V14 1 ENSP=ENSP00000409435;HGNC=SF3B1 2 ENSP=ENSP00000335321;HGNC=SF3B1 3 ENSP=ENSP00000335321;HGNC=SF3B1 4 ENSP=ENSP00000335321;HGNC=SF3B1 5 ENSP=ENSP00000324375;HGNC=DNMT3A 6 ENSP=ENSP00000370132;HGNC=DNMT3A 7 ENSP=ENSP00000384237;HGNC=DNMT3A 8 ENSP=ENSP00000370122;HGNC=DNMT3A 9 ENSP=ENSP00000264709;HGNC=DNMT3A 10 ENSP=ENSP00000263967;PolyPhen=benign(0.019);SIFT=tolerated(0.13);HGNC=PIK3CA 11 ENSP=ENSP00000306705;PolyPhen=probably_damaging(0.983);SIFT=deleterious(0.01);HGNC=TET2 12 ENSP=ENSP00000391448;PolyPhen=possibly_damaging(0.825);SIFT=deleterious(0);HGNC=TET2 13 ENSP=ENSP00000442788;PolyPhen=possibly_damaging(0.825);SIFT=deleterious(0);HGNC=TET2 14 ENSP=ENSP00000442867;PolyPhen=probably_damaging(0.952);SIFT=deleterious(0.01);HGNC=TET2 15 ENSP=ENSP00000369351;PolyPhen=possibly_damaging(0.825);SIFT=deleterious(0);HGNC=TET2 16 ENSP=ENSP00000438851;PolyPhen=probably_damaging(0.998);SIFT=deleterious(0.01);HGNC=TET2 17 ENSP=ENSP00000378245;PolyPhen=probably_damaging(0.983);SIFT=deleterious(0.01);HGNC=TET2 18 ENSP=ENSP00000425443;PolyPhen=possibly_damaging(0.825);SIFT=deleterious(0);HGNC=TET2 19 ENSP=ENSP00000265149;PolyPhen=probably_damaging(0.952);SIFT=deleterious(0.01);HGNC=TET2 20 ENSP=ENSP00000306705;PolyPhen=possibly_damaging(0.602);SIFT=deleterious(0);HGNC=TET2 -- The Wellcome Trust Sanger Institute is operated by Genome Research Limited, a charity registered in England with number 1021457 and a company registered in England with number 2742969, whose registered office is 215 Euston Road, London, NW1 2BE. ______________________________________________ R-help@r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code. ______________________________________________ R-help@r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code.