Author: mes
Date: 2012-01-30 16:52:07 -0800 (Mon, 30 Jan 2012)
New Revision: 28162
Added:
core3/impl/trunk/psi-mi-impl/impl/src/test/java/org/cytoscape/psi_mi/internal/plugin/MITABLineTest.java
Modified:
core3/impl/trunk/psi-mi-impl/impl/src/main/java/org/cytoscape/psi_mi/internal/plugin/MITABLine.java
Log:
significant updates to mitab line reader
Modified:
core3/impl/trunk/psi-mi-impl/impl/src/main/java/org/cytoscape/psi_mi/internal/plugin/MITABLine.java
===================================================================
---
core3/impl/trunk/psi-mi-impl/impl/src/main/java/org/cytoscape/psi_mi/internal/plugin/MITABLine.java
2012-01-31 00:01:09 UTC (rev 28161)
+++
core3/impl/trunk/psi-mi-impl/impl/src/main/java/org/cytoscape/psi_mi/internal/plugin/MITABLine.java
2012-01-31 00:52:07 UTC (rev 28162)
@@ -7,17 +7,17 @@
// can be further separated by ':'
// These are the columns
//
-// 0 srcAlias:sourceRawId|srcAlias:srcAlias
-// 1 tgtAlias:targetRawId|tgtAlias:tgtAlias
-// 2 srcAlias:srcAlias|srcAlias:srcAlias
-// 3 tgtAlias:tgtAlias|tgtAlias:tgtAlias
-// 4 srcAlias:srcAlias|srcAlias:srcAlias
-// 5 tgtAlias:tgtAlias|tgtAlias:tgtAlias
-// 6 detectionMethod|detectionMethod
+// 0 srcDB:sourceRawId|srcDB:srcAlias
+// 1 tgtDB:targetRawId|tgtDB:tgtAlias
+// 2 srcDB:srcAlias|srcDB:srcAlias
+// 3 tgtDB:tgtAlias|tgtDB:tgtAlias
+// 4 srcDB:srcAlias|srcDB:srcAlias
+// 5 tgtDB:tgtAlias|tgtDB:tgtAlias
+// 6 detectionDB:detectionMethod|detectionDB:detectionMethod
// 7 authors|authors
// 8 publicationIDKey:publicationIDValue|publicationIDKey:publicationIDValue
-// 9 srcAttrName:srcTaxonName|XXXX:XXXX
-// 10 tgtAttrName:tgtTaxonName|XXXX:XXXX
+// 9 srcTaxonDB:srcTaxonName|srcTaxonDB:srcTaxonName
+// 10 tgtTaxonDB:tgtTaxonName|tgtTaxonDB:tgtTaxonName
// 11 interactionType|interactionType
// 12 sourceDB|sourceDB
// 13 interactionID|XXXX
@@ -28,260 +28,273 @@
final char COLON = ':';
final char PIPE = '|';
final char TAB = ' ';
+ final char QUOTE = '"';
String sourceRawID = "";
String targetRawID = "";
- String srcAttrName = "";
- String srcTaxonName = "";
- String tgtAttrName = "";
- String tgtTaxonName = "";
- String interactionID = "";
- List<String> srcAliases = new ArrayList<String>(20);
- List<String> tgtAliases = new ArrayList<String>(20);
- List<String> authors = new ArrayList<String>(20);
- List<String> detectionMethods = new ArrayList<String>(20);
- List<String> publicationIDs = new ArrayList<String>(20);
- List<String> publicationValues = new ArrayList<String>(20);
- List<String> sourceDBs = new ArrayList<String>(20);
- List<String> interactionTypes = new ArrayList<String>(20);
- List<String> edgeScoreTypes = new ArrayList<String>(20);
- List<String> edgeScoreStrings = new ArrayList<String>(20);
+ List<String> srcAliases = new ArrayList<String>(10);
+ List<String> srcDBs = new ArrayList<String>(10);
- int colon = 0;
- int tab = 0;
- int pipe = 0;
+ List<String> tgtAliases = new ArrayList<String>(10);
+ List<String> tgtDBs = new ArrayList<String>(10);
+ List<String> authors = new ArrayList<String>(5);
+
+ List<String> detectionMethods = new ArrayList<String>(5);
+ List<String> detectionDBs = new ArrayList<String>(5);
+
+ List<String> publicationValues = new ArrayList<String>(5);
+ List<String> publicationDBs = new ArrayList<String>(5);
+
+ List<String> srcTaxonDBs = new ArrayList<String>(5);
+ List<String> srcTaxonIDs = new ArrayList<String>(5);
+
+ List<String> tgtTaxonDBs = new ArrayList<String>(5);
+ List<String> tgtTaxonIDs = new ArrayList<String>(5);
+
+ List<String> sourceIDs = new ArrayList<String>(5);
+ List<String> sourceDBs = new ArrayList<String>(5);
+
+ List<String> interactionTypes = new ArrayList<String>(5);
+ List<String> interactionTypeDBs = new ArrayList<String>(5);
+
+ List<String> edgeScoreTypes = new ArrayList<String>(5);
+ List<String> edgeScoreStrings = new ArrayList<String>(5);
+
+ List<String> interactionIDs = new ArrayList<String>(5);
+ List<String> interactionDBs = new ArrayList<String>(5);
+
+ private int colon = 0;
+ private int tab = 0;
+ private int pipe = 0;
+ private int begin = 0;
+ private int end = 0;
+
private void init() {
sourceRawID = "";
targetRawID = "";
- srcAttrName = "";
- srcTaxonName = "";
- tgtAttrName = "";
- tgtTaxonName = "";
- interactionID = "";
colon = 0;
tab = 0;
pipe = 0;
+ begin = 0;
+ end = 0;
srcAliases.clear();
tgtAliases.clear();
authors.clear();
detectionMethods.clear();
- publicationIDs.clear();
+ detectionDBs.clear();
+ publicationDBs.clear();
publicationValues.clear();
+ srcTaxonIDs.clear();
+ srcTaxonDBs.clear();
+ tgtTaxonIDs.clear();
+ tgtTaxonDBs.clear();
+ sourceIDs.clear();
sourceDBs.clear();
interactionTypes.clear();
+ interactionTypeDBs.clear();
edgeScoreTypes.clear();
edgeScoreStrings.clear();
+ interactionIDs.clear();
+ interactionDBs.clear();
}
- private int nextIndex(String s, int start) {
- colon = s.indexOf(COLON, start);
- pipe = s.indexOf(PIPE, start);
- tab = s.indexOf(TAB, start);
- return Math.min(colon, Math.min(pipe,tab));
- }
void readLine(String line) {
init();
- int begin = 0;
- int end = 0;
-
// column 0
- // get first source alias
- end = nextIndex(line,0);
- srcAliases.add(line.substring(begin,end));
- begin = end+1;
+ // get first source DB
+ srcDBs.add(nextString(line));
// get sourceRawID
- end = nextIndex(line,begin);
- sourceRawID = line.substring(begin,end);
- begin = end+1;
+ sourceRawID = nextString(line);
+ srcAliases.add(sourceRawID);
- // get any additional source aliases
- do {
- end = nextIndex(line,begin);
- srcAliases.add(line.substring(begin,end));
- begin = end+1;
- } while ( end != tab );
+ // get any additional source aliases from col 0
+ addNextPairs("additional src aliases", srcDBs, srcAliases, line
);
// column 1
- // get first target alias
- end = nextIndex(line,begin);
- tgtAliases.add(line.substring(begin,end));
- begin = end+1;
+ // get first target db
+ tgtDBs.add(nextString(line));
// get targetRawID
- end = nextIndex(line,begin);
- targetRawID = line.substring(begin,end);
- begin = end+1;
+ targetRawID = nextString(line);
+ tgtAliases.add(targetRawID);
- // get any additional target aliases
- do {
- end = nextIndex(line,begin);
- tgtAliases.add(line.substring(begin,end));
- begin = end+1;
- } while ( end != tab );
+ // get any additional target aliases from col 1
+ addNextPairs("additional tgt aliases", tgtDBs, tgtAliases,
line);
// column 2
// get any additional source aliases
- do {
- end = nextIndex(line,begin);
- srcAliases.add(line.substring(begin,end));
- begin = end+1;
- } while ( end != tab );
+ addNextPairs("col 2 src", srcDBs, srcAliases, line );
// column 3
// get any additional target aliases
- do {
- end = nextIndex(line,begin);
- tgtAliases.add(line.substring(begin,end));
- begin = end+1;
- } while ( end != tab );
+ addNextPairs("col 3 tgt", tgtDBs, tgtAliases, line);
// column 4
// get any additional source aliases
- do {
- end = nextIndex(line,begin);
- srcAliases.add(line.substring(begin,end));
- begin = end+1;
- } while ( end != tab );
+ addNextPairs("col 4 src", srcDBs, srcAliases, line );
// column 5
// get any additional target aliases
- do {
- end = nextIndex(line,begin);
- tgtAliases.add(line.substring(begin,end));
- begin = end+1;
- } while ( end != tab );
+ addNextPairs("col 5 tgt", tgtDBs, tgtAliases, line);
// column 6
// get any detection methods
- do {
- end = nextIndex(line,begin);
- detectionMethods.add(line.substring(begin,end));
- begin = end+1;
- } while ( end != tab );
+ addNextPairs("detection", detectionDBs, detectionMethods, line);
// column 7
// get any authors
- do {
- end = nextIndex(line,begin);
- authors.add(line.substring(begin,end));
- begin = end+1;
- } while ( end != tab );
+ addNextValues("authors",authors,line);
// column 8
// get any additional publications
- do {
- end = nextIndex(line,begin);
- publicationIDs.add(line.substring(begin,end));
- begin = end+1;
- end = nextIndex(line,begin);
- publicationValues.add(line.substring(begin,end));
- begin = end+1;
- } while ( end != tab );
+ addNextPairs("publications", publicationDBs, publicationValues,
line);
// column 9
// get source taxon
- end = nextIndex(line,begin);
- srcAttrName = line.substring(begin,end);
- begin = end+1;
+ addNextPairs("src taxon", srcTaxonDBs, srcTaxonIDs, line);
- end = nextIndex(line,begin);
- srcTaxonName = line.substring(begin,end);
- begin = end+1;
-
- // skip anything else in this column
- do {
- end = nextIndex(line,begin);
- begin = end+1;
- } while ( end != tab );
-
// column 10
// get target taxon
- end = nextIndex(line,begin);
- tgtAttrName = line.substring(begin,end);
- begin = end+1;
+ addNextPairs("tgt taxon", tgtTaxonDBs, tgtTaxonIDs, line);
- end = nextIndex(line,begin);
- tgtTaxonName = line.substring(begin,end);
- begin = end+1;
-
- // skip anything else in this column
- do {
- end = nextIndex(line,begin);
- begin = end+1;
- } while ( end != tab );
-
// column 11
// get any interaction types
- do {
- end = nextIndex(line,begin);
- interactionTypes.add(line.substring(begin,end));
- begin = end+1;
- } while ( end != tab );
+ addNextPairs("interaction", interactionTypeDBs,
interactionTypes, line);
// column 12
// get any source databases
- do {
- end = nextIndex(line,begin);
- sourceDBs.add(line.substring(begin,end));
- begin = end+1;
- } while ( end != tab );
+ addNextPairs("source", sourceDBs,sourceIDs,line);
// column 13
// get interaction ID
- end = nextIndex(line,begin);
- interactionID = line.substring(begin,end);
- begin = end+1;
+ addNextPairs("interaction IDs", interactionDBs, interactionIDs,
line );
- // skip anything else in this column
- do {
- end = nextIndex(line,begin);
- begin = end+1;
- } while ( end != tab );
-
// column 14
// get edge scores
- do {
- end = nextIndex(line,begin);
- edgeScoreTypes.add(line.substring(begin,end));
- begin = end+1;
- end = nextIndex(line,begin);
- edgeScoreStrings.add(line.substring(begin,end));
- begin = end+1;
- } while ( end != tab );
+ addNextPairs("edge scores", edgeScoreTypes, edgeScoreStrings,
line);
}
+ // just for debugging
public void print() {
System.out.println("sourceRawID: " + sourceRawID);
System.out.println("targetRawID: " + targetRawID);
- System.out.println("srcAttrName: " + srcAttrName);
- System.out.println("srcTaxonName: " + srcTaxonName);
- System.out.println("tgtAttrName: " + tgtAttrName);
- System.out.println("tgtTaxonName: " + tgtTaxonName);
- System.out.println("interactionID: " + interactionID);
printList("srcAliases", srcAliases);
printList("tgtAliases", tgtAliases);
+ printList("detectionDBs", detectionDBs);
+ printList("detectionMethods", detectionMethods);
printList("authors", authors);
- printList("detectionMethods", detectionMethods);
- printList("publicationIDs", publicationIDs);
+ printList("publicationDBs", publicationDBs);
printList("publicationValues", publicationValues);
printList("sourceDBs", sourceDBs);
+ printList("sourceIDs", sourceIDs);
printList("interactionTypes", interactionTypes);
+ printList("interactionTypeDBs", interactionTypeDBs);
+ printList("interactionIDs", interactionIDs);
+ printList("interactionDBs", interactionDBs);
printList("edgeScoreTypes", edgeScoreTypes);
printList("edgeScoreStrings", edgeScoreStrings);
System.out.println();
System.out.println();
}
- private void printList(String name, List<String> vals) {
+ // just for debugging
+ public void printList(String name, List<String> vals) {
System.out.print(name + ": ");
for ( String s : vals )
- System.out.print(s + ", ");
+ System.out.print("'" + s + "', ");
System.out.println();
}
+
+ private String nextString(String line) {
+ end = nextIndex(line,begin);
+ if ( (begin > end) || (begin > line.length() - 1))
+ return "";
+
+ String ret = line.substring(begin,end);
+
+ // This is an attempt to handle quoted strings, which may
+ // include our tokenizing characters! Basically, if
+ // we see a quote, make sure we get a close quote too!
+ int openQuote = ret.indexOf(QUOTE);
+ if ( openQuote >= 0 ) {
+ int closeQuote = ret.indexOf(QUOTE,openQuote+1);
+ if ( closeQuote < 0 ) {
+ end = nextIndex(line,end+1);
+ ret = line.substring(begin,end);
+ }
+ }
+
+ begin = end+1;
+ return ret;
+ }
+
+ private void addNextValues(String desc,List<String> values, String
line) {
+ do {
+ authors.add(nextString(line));
+ } while ( end != tab );
+ }
+
+ private void addNextPairs(String desc, List<String> dbs, List<String>
values, String line) {
+
+ //System.out.println("starting: " + desc);
+ //int peekEnd = peekNextIndex(line,begin);
+ //if ( (begin <= peekEnd) && (begin >= 0) && (peekEnd >= 0) )
+ // System.out.println(" for: " +
line.substring(begin,peekEnd));
+ //else
+ // System.out.println(" weird begin: " + begin + " end: "
+ peekEnd);
+
+
+ do {
+ String db = nextString(line);
+ //System.out.println(" next db string: '" + db +
"'");
+
+ // make sure the first column is valid before continuing
+ if ( db.equals("") || db.equals("-") ) {
+ //System.out.println(" got invalid col: "
+ db);
+ return;
+ }
+ dbs.add(db);
+
+ String val = nextString(line);
+ //System.out.println(" next val string: '" + val +
"'");
+ values.add(val);
+ } while ( end != tab );
+ }
+
+ private int nextIndex(String s, int start) {
+ colon = s.indexOf(COLON, start);
+ if ( colon < 0 ) colon = s.length() - 1;
+
+ pipe = s.indexOf(PIPE, start);
+ if ( pipe < 0 ) pipe = s.length() - 1;
+
+ tab = s.indexOf(TAB, start);
+ if ( tab < 0 ) tab = s.length() - 1;
+
+ int ind = Math.min(colon, Math.min(pipe,tab));
+
+ return ind;
+ }
+
+ // just for debugging!
+ private int peekNextIndex(String s, int start) {
+ int x, y, z = 0;
+ x = s.indexOf(COLON, start);
+ if ( x < 0 ) x = s.length() - 1;
+
+ y = s.indexOf(PIPE, start);
+ if ( y < 0 ) y = s.length() - 1;
+
+ z = s.indexOf(TAB, start);
+ if ( z < 0 ) z = s.length() - 1;
+
+ int ind = Math.min(x, Math.min(y,z));
+ return ind;
+ }
}
Added:
core3/impl/trunk/psi-mi-impl/impl/src/test/java/org/cytoscape/psi_mi/internal/plugin/MITABLineTest.java
===================================================================
---
core3/impl/trunk/psi-mi-impl/impl/src/test/java/org/cytoscape/psi_mi/internal/plugin/MITABLineTest.java
(rev 0)
+++
core3/impl/trunk/psi-mi-impl/impl/src/test/java/org/cytoscape/psi_mi/internal/plugin/MITABLineTest.java
2012-01-31 00:52:07 UTC (rev 28162)
@@ -0,0 +1,116 @@
+package org.cytoscape.psi_mi.internal.plugin;
+
+
+import java.io.File;
+import java.io.FileReader;
+import java.io.IOException;
+import java.io.Reader;
+import java.io.BufferedReader;
+import java.util.Properties;
+
+import static org.junit.Assert.*;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+import static org.mockito.Mockito.when;
+import org.mockito.Mock;
+import org.mockito.MockitoAnnotations;
+
+public class MITABLineTest {
+
+ File file;
+ BufferedReader is;
+
+ @Before
+ public void setUp() throws Exception {
+ file = new
File("src/test/resources/testData/BIOGRID-ORGANISM-Bos_taurus-3.1.74.mitab");
+ is = new BufferedReader(new FileReader(file));
+ }
+
+ @Test
+// #ID Interactor A ID Interactor B Alt IDs Interactor A Alt IDs
Interactor B Aliases Interactor A Aliases Interactor B Interaction
Detection Method Publication 1st Author Publication Identifiers Taxid
Interactor A Taxid Interactor B Interaction Types Source Database
Interaction Identifiers Confidence Values
+// entrez gene/locuslink:280906|GRID:158296 entrez
gene/locuslink:281119|GRID:158481 entrez gene/locuslink:RB1 entrez
gene/locuslink:DNMT1|entrez gene/locuslink:BOS_7601 - entrez
gene/locuslink:DNMT(gene name synonym) psi-mi:"MI:0004"(affinity
chromatography technology) "Robertson KD (2000)" pubmed:10888886
taxid:9913 taxid:9913 psi-mi:"MI:0915"(physical association)
psi-mi:"MI:0463"(GRID) GRID:261841 -
+ public void testMITABLine() throws Exception {
+ MITABLine mline = new MITABLine();
+
+ String line;
+
+ while ((line = is.readLine()) != null) {
+ if ( line.startsWith("#") )
+ continue;
+
+ mline.readLine(line);
+
+ assertEquals("280906",mline.sourceRawID);
+ assertEquals("281119",mline.targetRawID);
+ assertTrue(mline.srcAliases.contains("158296"));
+ assertTrue(mline.srcAliases.contains("RB1"));
+ assertTrue(mline.tgtAliases.contains("158481"));
+ assertTrue(mline.tgtAliases.contains("DNMT1"));
+ assertTrue(mline.tgtAliases.contains("BOS_7601"));
+ assertEquals(1,mline.detectionMethods.size());
+ assertEquals(1,mline.detectionDBs.size());
+ assertTrue(mline.authors.contains("\"Robertson KD
(2000)\""));
+ assertEquals("pubmed",mline.publicationDBs.get(0));
+ assertEquals("10888886",mline.publicationValues.get(0));
+ assertEquals("9913",mline.srcTaxonIDs.get(0));
+ assertEquals("9913",mline.tgtTaxonIDs.get(0));
+ assertEquals("\"MI:0915\"(physical
association)",mline.interactionTypes.get(0));
+
assertEquals("\"MI:0463\"(GRID)",mline.sourceIDs.get(0));
+ assertEquals("261841",mline.interactionIDs.get(0));
+ assertEquals(0,mline.edgeScoreStrings.size());
+
+ break;
+ }
+
+ }
+
+
+ @Test
+// #ID Interactor A ID Interactor B Alt IDs Interactor A Alt IDs
Interactor B Aliases Interactor A Aliases Interactor B Interaction
Detection Method Publication 1st Author Publication Identifiers Taxid
Interactor A Taxid Interactor B Interaction Types Source Database
Interaction Identifiers Confidence Values
+//entrez gene/locuslink:326601|GRID:160074 entrez
gene/locuslink:819210|GRID:4545 entrez gene/locuslink:H3F3A|entrez
gene/locuslink:BOS_15646 entrez gene/locuslink:BRM|entrez
gene/locuslink:At2g46020 entrez gene/locuslink:H3F3B(gene name synonym)
entrez gene/locuslink:ARABIDOPSIS THALIANA BRAHMA(gene name synonym)|entrez
gene/locuslink:T3F17.33(gene name synonym)|entrez gene/locuslink:CHA2(gene name
synonym)|entrez gene/locuslink:CHROMATIN REMODELING 2(gene name synonym)|entrez
gene/locuslink:ATBRM(gene name synonym)|entrez gene/locuslink:BRAHMA(gene name
synonym)|entrez gene/locuslink:CHR2(gene name synonym) psi-mi:"MI:0047"(far
western blotting) "Farrona S (2007)" pubmed:17825834 taxid:9913 taxid:3702
psi-mi:"MI:0407"(direct interaction) psi-mi:"MI:0463"(GRID) GRID:271838 -
+ public void testMITABLine3() throws Exception {
+ MITABLine mline = new MITABLine();
+
+ String line;
+
+ int lineNum = 0;
+ while ((line = is.readLine()) != null) {
+ if ( line.startsWith("#") )
+ continue;
+ if ( lineNum++ < 3 )
+ continue;
+ mline.readLine(line);
+
+ assertEquals("326601",mline.sourceRawID);
+ assertEquals("819210",mline.targetRawID);
+ assertTrue(mline.srcAliases.contains("160074"));
+ assertTrue(mline.srcAliases.contains("H3F3A"));
+ assertTrue(mline.srcAliases.contains("BOS_15646"));
+ assertTrue(mline.srcAliases.contains("H3F3B(gene name
synonym)"));
+ assertTrue(mline.tgtAliases.contains("4545"));
+ assertTrue(mline.tgtAliases.contains("BRM"));
+ assertTrue(mline.tgtAliases.contains("At2g46020"));
+ assertTrue(mline.tgtAliases.contains("ARABIDOPSIS
THALIANA BRAHMA(gene name synonym)"));
+ assertTrue(mline.tgtAliases.contains("T3F17.33(gene
name synonym)"));
+ assertTrue(mline.tgtAliases.contains("CHA2(gene name
synonym)"));
+ assertTrue(mline.tgtAliases.contains("CHROMATIN
REMODELING 2(gene name synonym)"));
+ assertTrue(mline.tgtAliases.contains("ATBRM(gene name
synonym)"));
+ assertTrue(mline.tgtAliases.contains("BRAHMA(gene name
synonym)"));
+ assertTrue(mline.tgtAliases.contains("CHR2(gene name
synonym)"));
+ assertEquals("\"MI:0047\"(far western
blotting)",mline.detectionMethods.get(0));
+ assertEquals(1,mline.detectionDBs.size());
+ assertTrue(mline.authors.contains("\"Farrona S
(2007)\""));
+ assertEquals("pubmed",mline.publicationDBs.get(0));
+ assertEquals("17825834",mline.publicationValues.get(0));
+ assertEquals("9913",mline.srcTaxonIDs.get(0));
+ assertEquals("3702",mline.tgtTaxonIDs.get(0));
+ assertEquals("\"MI:0407\"(direct
interaction)",mline.interactionTypes.get(0));
+
assertEquals("\"MI:0463\"(GRID)",mline.sourceIDs.get(0));
+ assertEquals("271838",mline.interactionIDs.get(0));
+ assertEquals(0,mline.edgeScoreStrings.size());
+
+ break;
+ }
+ }
+}
--
You received this message because you are subscribed to the Google Groups
"cytoscape-cvs" group.
To post to this group, send email to [email protected].
To unsubscribe from this group, send email to
[email protected].
For more options, visit this group at
http://groups.google.com/group/cytoscape-cvs?hl=en.