Revision: 19635
http://sourceforge.net/p/gate/code/19635
Author: markagreenwood
Date: 2016-10-05 09:52:33 +0000 (Wed, 05 Oct 2016)
Log Message:
-----------
fixed the other XML test so it still works but doesn't rely on ANNIE being
available at build time
Added Paths:
-----------
gate/branches/sawdust2/gate-core/src/test/java/gate/xml/TestXml.java
Removed Paths:
-------------
gate/branches/sawdust2/gate-core/src/test/disabled/gate/xml/TestXml.java
Deleted:
gate/branches/sawdust2/gate-core/src/test/disabled/gate/xml/TestXml.java
===================================================================
--- gate/branches/sawdust2/gate-core/src/test/disabled/gate/xml/TestXml.java
2016-10-05 09:08:51 UTC (rev 19634)
+++ gate/branches/sawdust2/gate-core/src/test/disabled/gate/xml/TestXml.java
2016-10-05 09:52:33 UTC (rev 19635)
@@ -1,463 +0,0 @@
-/*
- * TestXml.java
- *
- * Copyright (c) 1995-2012, The University of Sheffield. See the file
- * COPYRIGHT.txt in the software or at http://gate.ac.uk/gate/COPYRIGHT.txt
- *
- * This file is part of GATE (see http://gate.ac.uk/), and is free
- * software, licenced under the GNU Library General Public License,
- * Version 2, June 1991 (in the distribution as file licence.html,
- * and also available at http://gate.ac.uk/gate/licence.html).
- *
- * Cristian URSU, 8/May/2000
- *
- * $Id$
- */
-
-package gate.xml;
-
-import gate.Annotation;
-import gate.AnnotationSet;
-import gate.Corpus;
-import gate.Document;
-import gate.DocumentFormat;
-import gate.Factory;
-import gate.FeatureMap;
-import gate.Gate;
-import gate.GateConstants;
-import gate.corpora.DocumentImpl;
-import gate.corpora.TestDocument;
-import gate.creole.ANNIEConstants;
-import gate.creole.ConditionalSerialAnalyserController;
-import gate.util.Files;
-import gate.util.persistence.PersistenceManager;
-
-import java.io.File;
-import java.net.URL;
-import java.util.HashMap;
-import java.util.Iterator;
-import java.util.LinkedList;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-import java.util.TreeSet;
-
-import junit.framework.Test;
-import junit.framework.TestCase;
-import junit.framework.TestSuite;
-
-//import org.w3c.www.mime.*;
-
-
-/** Test class for XML facilities
- *
- */
-public class TestXml extends TestCase
-{
- /** The encoding used in our tests*/
- private static String workingEncoding="UTF-8";
-
- /** Construction */
- public TestXml(String name) { super(name); }
-
- /** Fixture set up */
- @Override
- public void setUp() {
- } // setUp
-
- public void testGateDocumentToAndFromXmlWithDifferentKindOfFormats()
- throws
Exception{
- List<URL> urlList = new LinkedList<URL>();
- List<String> urlDescription = new LinkedList<String>();
- URL url = null;
-
- url = new URL(TestDocument.getTestServerName()+"tests/xml/xces.xml");
- assertTrue("Coudn't create a URL object for tests/xml/xces.xml ", url !=
null);
- urlList.add(url);
- urlDescription.add(" an XML document ");
-
- url = new URL(TestDocument.getTestServerName()+"tests/xml/Sentence.xml");
- assertTrue("Coudn't create a URL object for tests/xml/Sentence.xml",
- url != null);
- urlList.add(url);
- urlDescription.add(" an XML document ");
-
- url = new URL(TestDocument.getTestServerName()+"tests/html/test1.htm");
- assertTrue("Coudn't create a URL object for tests/html/test.htm",url !=
null);
- urlList.add(url);
- urlDescription.add(" an HTML document ");
-
- url = new URL(TestDocument.getTestServerName()+"tests/email/test2.eml");
- assertTrue("Coudn't create a URL object for defg ",url != null);
- urlList.add(url);
- urlDescription.add(" an EMAIL document ");
-
- Iterator<URL> iter = urlList.iterator();
- Iterator<String> descrIter = urlDescription.iterator();
- while(iter.hasNext()){
- runCompleteTestWithAFormat(iter.next(), descrIter.next());
- }// End While
-
-
- }// testGateDocumentToAndFromXmlWithDifferentKindOfFormats
-
- private void runCompleteTestWithAFormat(URL url, String urlDescription)
- throws Exception{
- // Load the xml Key Document and unpack it
- gate.Document keyDocument = null;
-
- FeatureMap params = Factory.newFeatureMap();
- params.put(Document.DOCUMENT_URL_PARAMETER_NAME, url);
- params.put(Document.DOCUMENT_MARKUP_AWARE_PARAMETER_NAME, "false");
- keyDocument = (Document)Factory.createResource("gate.corpora.DocumentImpl",
- params);
-
- assertTrue("Coudn't create a GATE document instance for " +
- url.toString() +
- " Can't continue." , keyDocument != null);
-
- gate.DocumentFormat keyDocFormat = null;
- keyDocFormat = gate.DocumentFormat.getDocumentFormat(
- keyDocument, keyDocument.getSourceUrl()
- );
-
- assertTrue("Fail to recognize " +
- url.toString() +
- " as being " + urlDescription + " !", keyDocFormat != null);
-
- // Unpack the markup
- keyDocFormat.unpackMarkup(keyDocument);
- // Verfy if all annotations from the default annotation set are consistent
- gate.corpora.TestDocument.verifyNodeIdConsistency(keyDocument);
-
- // Verifies if the maximum annotation ID on the GATE doc is less than the
- // Annotation ID generator of the document.
- verifyAnnotationIDGenerator(keyDocument);
-
- // Save the size of the document and the number of annotations
- long keyDocumentSize = keyDocument.getContent().size().longValue();
- int keyDocumentAnnotationSetSize = keyDocument.getAnnotations().size();
-
-
- // Export the Gate document called keyDocument as XML, into a temp file,
- // using the working encoding
- File xmlFile = null;
- xmlFile = Files.writeTempFile(keyDocument.toXml(), workingEncoding );
- assertTrue("The temp GATE XML file is null. Can't continue.",xmlFile !=
null);
-
- // Load the XML Gate document form the tmp file into memory
- gate.Document gateDoc = null;
- gateDoc = gate.Factory.newDocument(xmlFile.toURI().toURL(),
workingEncoding);
-
- assertTrue("Coudn't create a GATE document instance for " +
- xmlFile.toURI().toURL().toString() +
- " Can't continue." , gateDoc != null);
-
- gate.DocumentFormat gateDocFormat = null;
- gateDocFormat =
- DocumentFormat.getDocumentFormat(gateDoc,gateDoc.getSourceUrl());
-
- assertTrue("Fail to recognize " +
- xmlFile.toURI().toURL().toString() +
- " as being a GATE XML document !", gateDocFormat != null);
-
- gateDocFormat.unpackMarkup(gateDoc);
- // Verfy if all annotations from the default annotation set are consistent
- gate.corpora.TestDocument.verifyNodeIdConsistency(gateDoc);
-
- // Save the size of the document snd the number of annotations
- long gateDocSize = keyDocument.getContent().size().longValue();
- int gateDocAnnotationSetSize = keyDocument.getAnnotations().size();
-
- assertTrue("Exporting as GATE XML resulted in document content size lost."
+
- " Something went wrong.", keyDocumentSize == gateDocSize);
-
- assertTrue("Exporting as GATE XML resulted in annotation lost." +
- " No. of annotations missing = " +
- Math.abs(keyDocumentAnnotationSetSize - gateDocAnnotationSetSize),
- keyDocumentAnnotationSetSize == gateDocAnnotationSetSize);
-
- // Verifies if the maximum annotation ID on the GATE doc is less than the
- // Annotation ID generator of the document.
- verifyAnnotationIDGenerator(gateDoc);
-
- //Don't need tmp Gate XML file.
- xmlFile.delete();
- }//runCompleteTestWithAFormat
-
- /** A test */
- public void testUnpackMarkup() throws Exception{
- // create the markupElementsMap map
- //Map markupElementsMap = null;
- gate.Document doc = null;
- /*
- markupElementsMap = new HashMap();
- // populate it
- markupElementsMap.put ("S","Sentence");
- markupElementsMap.put ("s","Sentence");
- */
- // Create the element2String map
- Map<String,String> anElement2StringMap = new HashMap<String,String>();
-
- // Populate it
- anElement2StringMap.put("S","\n");
- anElement2StringMap.put("s","\n");
-
- doc = gate.Factory.newDocument(new
URL(TestDocument.getTestServerName()+"tests/xml/xces.xml"), workingEncoding);
-
- AnnotationSet annotSet = doc.getAnnotations(
- GateConstants.ORIGINAL_MARKUPS_ANNOT_SET_NAME);
- assertEquals("For "+doc.getSourceUrl()+" the number of annotations"+
- " should be:758",758,annotSet.size());
-
- gate.corpora.TestDocument.verifyNodeIdConsistency(doc);
-
- // Verifies if the maximum annotation ID on the GATE doc is less than the
- // Annotation ID generator of the document.
- verifyAnnotationIDGenerator(doc);
-
- } // testUnpackMarkup()
-
- /*
- * This method runs ANNIE with defaults on a document, then saves
- * it as a GATE XML document and loads it back. All the annotations on the
- * loaded document should be the same as the original ones.
- *
- * It also verifies if the matches feature still holds after an
export/import to XML
- */
- public void testAnnotationConsistencyForSaveAsXml()throws Exception{
- // Load a document from the test repository
- //Document origDoc =
gate.Factory.newDocument(Gate.getUrl("tests/xml/gateTestSaveAsXML.xml"));
- String testDoc =
gate.util.Files.getGateResourceAsString("gate.ac.uk/tests/xml/gateTestSaveAsXML.xml");
- Document origDoc = gate.Factory.newDocument(testDoc);
-
- // Verifies if the maximum annotation ID on the origDoc is less than the
- // Annotation ID generator of the document.
- verifyAnnotationIDGenerator(origDoc);
-
- // Load ANNIE with defaults and run it on the document
- ConditionalSerialAnalyserController annie =
(ConditionalSerialAnalyserController)
- PersistenceManager.loadObjectFromFile(new File(new File(
- Gate.getPluginsHome(), ANNIEConstants.PLUGIN_DIR),
- ANNIEConstants.DEFAULT_FILE));
- assertTrue("ANNIE not loaded!", annie != null);
- Corpus c = Factory.newCorpus("test");
- c.add(origDoc);
- annie.setCorpus(c);
- annie.execute();
-
- // SaveAS XML and reload the document into another GATE doc
- // Export the Gate document called origDoc as XML, into a temp file,
- // using the working encoding
- File xmlFile = Files.writeTempFile(origDoc.toXml(),workingEncoding);
- System.out.println("Saved to temp file :" + xmlFile.toURI().toURL());
-
- Document reloadedDoc = gate.Factory.newDocument(xmlFile.toURI().toURL(),
workingEncoding);
- // Verifies if the maximum annotation ID on the origDoc is less than the
- // Annotation ID generator of the document.
- verifyAnnotationIDGenerator(reloadedDoc);
-
- // Verify if the annotations are identical in the two docs.
- Map<Integer,Annotation> origAnnotMap = buildID2AnnotMap(origDoc);
- Map<Integer,Annotation> reloadedAnnMap = buildID2AnnotMap(reloadedDoc);
-
- //Verifies if the reloaded annotations are the same as the original ones
- verifyIDConsistency(origAnnotMap, reloadedAnnMap);
-
- // Build the original Matches map
- // ID -> List of IDs
- Map<Integer,List<Integer>> origMatchesMap = buildMatchesMap(origDoc);
- // Verify the consistency of matches
- // Compare every orig annotation pointed by the MatchesMap with the
reloadedAnnot
- // extracted from the reloadedMAp
- for(Iterator<Integer> it = origMatchesMap.keySet().iterator();
it.hasNext();){
- Integer id = it.next();
- Annotation origAnnot = origAnnotMap.get(id);
- assertTrue("Couldn't find an original annot with ID=" + id, origAnnot !=
null);
- Annotation reloadedAnnot = reloadedAnnMap.get(id);
- assertTrue("Couldn't find a reloaded annot with ID=" + id, reloadedAnnot
!= null);
- compareAnnot(origAnnot,reloadedAnnot);
- // Iterate through the matches list and repeat the comparison
- List<Integer> matchesList = origMatchesMap.get(id);
- for (Iterator<Integer> itList = matchesList.iterator();
itList.hasNext();){
- Integer matchId = itList.next();
- Annotation origA = origAnnotMap.get(matchId);
- assertTrue("Couldn't find an original annot with ID=" + matchId, origA
!= null);
- Annotation reloadedA = reloadedAnnMap.get(matchId);
- assertTrue("Couldn't find a reloaded annot with ID=" + matchId,
reloadedA != null);
- compareAnnot(origA, reloadedA);
- }// End for
- }// End for
- // Clean up the XMl file
- xmlFile.delete();
- }// End testAnnotationIDConsistencyForSaveAsXml
-
- /**
- * Builds a Map based on the matches feature of some annotations. The goal
is to
- * use this map to validate the annotations from the reloaded document.
- * In case no Annot has the matches feat, will return an Empty MAP
- * @param doc The document of which annotations will be used to construct
the map
- * @return A Map from Annot ID -> Lists of Annot IDs
- */
- private Map<Integer,List<Integer>> buildMatchesMap(Document doc){
- Map<Integer,List<Integer>> matchesMap = new
HashMap<Integer,List<Integer>>();
- // Scan the default annotation set
- AnnotationSet annotSet = doc.getAnnotations();
-
- helperBuildMatchesMap(annotSet, matchesMap);
- // Scan all named annotation sets
- if (doc.getNamedAnnotationSets() != null){
- for ( Iterator<AnnotationSet> namedAnnotSetsIter =
doc.getNamedAnnotationSets().values().iterator();
-
namedAnnotSetsIter.hasNext(); ){
- helperBuildMatchesMap(namedAnnotSetsIter.next(), matchesMap);
- }// End while
- }// End if
- return matchesMap;
- }// End of buildMatchesMap()
-
- /**
- * This is a helper metod. It scans an annotation set and adds the ID of the
annotations
- * which have the matches feature to the map.
- * @param sourceAnnotSet The annotation set investigated
- * @param aMap
- */
- private void helperBuildMatchesMap(AnnotationSet sourceAnnotSet,
Map<Integer,List<Integer>> aMap ){
-
- for (Iterator<Annotation> it = sourceAnnotSet.iterator(); it.hasNext();){
- Annotation a = it.next();
- FeatureMap aFeatMap = a.getFeatures();
- // Skip those annotations who don't have features
- if (aFeatMap == null) continue;
- // Extract the matches feat
- @SuppressWarnings("unchecked")
- List<Integer> matchesVal = (List<Integer>) aFeatMap.get("matches");
- if (matchesVal == null) continue;
- Integer id = a.getId();
- aMap.put(id,matchesVal);
- }//End for
-
- }// End of helperBuildMatchesMap()
-
- /**
- * This method tests if the generator for new Annotation IDs is greather
than the
- * maximum Annotation ID present in the GATE document. In oter words, it
ensures that
- * new Annotations will receive an UNIQUE ID.
- *
- * @param aDoc The GATE document being tested
- */
- protected void verifyAnnotationIDGenerator(gate.Document aDoc){
- // Creates a MAP containing all the annotations of the document.
- // In doing so, it also tests if there are annotations with the same ID.
- Map<Integer,Annotation> id2AnnotationMap = buildID2AnnotMap(aDoc);
-
- if (id2AnnotationMap == null || id2AnnotationMap.isEmpty()){
- //System.out.println("No annotations found on the document! Nothing to
test.");
- return;
- }
-
- // Get the key set of the Map and sort them
- Set<Integer> keysSet = id2AnnotationMap.keySet();
- TreeSet<Integer> sortedSet = new TreeSet<Integer>(keysSet);
- // Get the highest Annotation ID
- Integer maxAnnotId = sortedSet.last();
- // Compare its value to the one hold by the document's ID generator
- Integer generatorId = ((DocumentImpl)aDoc).getNextAnnotationId();
-
-// System.out.println("maxAnnotid = " + maxAnnotId + " generatorID = " +
generatorId);
-
- assertTrue("Annotation ID generator["+generatorId+"] on document [" +
aDoc.getSourceUrl() +
- "] was equal or less than the MAX Annotation ID["+maxAnnotId+"] on
the document."+
- " This may lead to Annotation ID conflicts.",
generatorId.intValue() > maxAnnotId.intValue());
-
-
- }// End of verifyAnnotationIDGenerator()
-
- /**
- * Verifies if the two maps hold annotations with the same ID. The only
thing not checked
- * are the features, as some of them could be lost in the
serialization/deserialization process
- * @param origAnnotMap A map by ID, containing the original annotations
- * @param reloadedAnnMap A map by ID, containing the recreated annotations
- */
- private void verifyIDConsistency(Map<Integer,Annotation> origAnnotMap,
Map<Integer,Annotation> reloadedAnnMap) {
- assertEquals("Found a different number of annot in both documents.",
- origAnnotMap.keySet().size(), reloadedAnnMap.keySet().size());
-
-// List orig = new ArrayList(origAnnotMap.keySet());
-// Collections.sort(orig);
-// System.out.println("ORIG SET =" + orig);
-//
-// List rel = new ArrayList(reloadedAnnMap.keySet());
-// Collections.sort(rel);
-// System.out.println("REL SET =" + rel);
-//
-
- for (Iterator<Integer> it = origAnnotMap.keySet().iterator();
it.hasNext();){
- Integer id = it.next();
- Annotation origAnn = origAnnotMap.get(id);
- Annotation reloadedAnnot = reloadedAnnMap.get(id);
-
- assertTrue("Annotation with ID="+ id +" was not found in the reloaded
document.", reloadedAnnot != null);
- compareAnnot(origAnn, reloadedAnnot);
-
- }// End for
- }// End of verifyIDConsistency()
-
- /**
- * Thes if two annotatiosn are the same, except their features.
- * @param origAnn
- * @param reloadedAnnot
- */
- private void compareAnnot(Annotation origAnn, Annotation reloadedAnnot) {
- assertTrue("Found original and reloaded annot without the same ID!",
- origAnn.getId().equals(reloadedAnnot.getId()));
- assertTrue("Found original and reloaded annot without the same TYPE!\n"+
- "Original was ["+origAnn.getType()+"] and reloaded was
["+reloadedAnnot.getType()+"].",
- origAnn.getType().equals(reloadedAnnot.getType()));
- assertTrue("Found original and reloaded annot without the same START
offset!",
-
origAnn.getStartNode().getOffset().equals(reloadedAnnot.getStartNode().getOffset()));
- assertTrue("Found original and reloaded annot without the same END
offset!",
-
origAnn.getEndNode().getOffset().equals(reloadedAnnot.getEndNode().getOffset()));
- }// End of compareAnnot()
-
-
- private Map<Integer,Annotation> addAnnotSet2Map(AnnotationSet annotSet,
Map<Integer,Annotation> id2AnnMap){
- for (Iterator<Annotation> it = annotSet.iterator(); it.hasNext();){
- Annotation a = it.next();
- Integer id = a.getId();
-
- assertTrue("Found two annotations(one with type = " + a.getType() +
- ")with the same ID=" + id, !id2AnnMap.keySet().contains(id));
-
- id2AnnMap.put(id, a);
- }// End for
- return id2AnnMap;
- }
-
- /**
- * Scans a target Doc for all Annotations and builds a map (from anot ID to
annot) in the process
- * I also checks to see if there are two annotations with the same ID.
- * @param aDoc The GATE doc to be scaned
- * @return a Map ID2Annot
- */
- private Map<Integer,Annotation> buildID2AnnotMap(Document aDoc){
- Map<Integer,Annotation> id2AnnMap = new HashMap<Integer,Annotation>();
- // Scan the default annotation set
- AnnotationSet annotSet = aDoc.getAnnotations();
- addAnnotSet2Map(annotSet, id2AnnMap);
- // Scan all named annotation sets
- if (aDoc.getNamedAnnotationSets() != null){
- for ( Iterator<AnnotationSet> namedAnnotSetsIter =
aDoc.getNamedAnnotationSets().values().iterator();
-
namedAnnotSetsIter.hasNext(); ){
-
- addAnnotSet2Map(namedAnnotSetsIter.next(), id2AnnMap);
- }// End while
- }// End if
- return id2AnnMap;
- }// End of buildID2AnnotMap()
-
- /** Test suite routine for the test runner */
- public static Test suite() {
- return new TestSuite(TestXml.class);
- } // suite
-
-} // class TestXml
Copied: gate/branches/sawdust2/gate-core/src/test/java/gate/xml/TestXml.java
(from rev 19634,
gate/branches/sawdust2/gate-core/src/test/disabled/gate/xml/TestXml.java)
===================================================================
--- gate/branches/sawdust2/gate-core/src/test/java/gate/xml/TestXml.java
(rev 0)
+++ gate/branches/sawdust2/gate-core/src/test/java/gate/xml/TestXml.java
2016-10-05 09:52:33 UTC (rev 19635)
@@ -0,0 +1,458 @@
+/*
+ * TestXml.java
+ *
+ * Copyright (c) 1995-2012, The University of Sheffield. See the file
+ * COPYRIGHT.txt in the software or at http://gate.ac.uk/gate/COPYRIGHT.txt
+ *
+ * This file is part of GATE (see http://gate.ac.uk/), and is free
+ * software, licenced under the GNU Library General Public License,
+ * Version 2, June 1991 (in the distribution as file licence.html,
+ * and also available at http://gate.ac.uk/gate/licence.html).
+ *
+ * Cristian URSU, 8/May/2000
+ *
+ * $Id$
+ */
+
+package gate.xml;
+
+import java.io.File;
+import java.net.URL;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.TreeSet;
+
+import gate.Annotation;
+import gate.AnnotationSet;
+import gate.Document;
+import gate.DocumentFormat;
+import gate.Factory;
+import gate.FeatureMap;
+import gate.Gate;
+import gate.GateConstants;
+import gate.corpora.DocumentImpl;
+import gate.corpora.TestDocument;
+import gate.util.Files;
+import junit.framework.Test;
+import junit.framework.TestCase;
+import junit.framework.TestSuite;
+
+//import org.w3c.www.mime.*;
+
+
+/** Test class for XML facilities
+ *
+ */
+public class TestXml extends TestCase
+{
+ /** The encoding used in our tests*/
+ private static String workingEncoding="UTF-8";
+
+ /** Construction */
+ public TestXml(String name) { super(name); }
+
+ /** Fixture set up */
+ @Override
+ public void setUp() throws Exception {
+ if (!Gate.isInitialised()) {
+ Gate.runInSandbox(true);
+ Gate.init();
+ }
+ } // setUp
+
+ public void testGateDocumentToAndFromXmlWithDifferentKindOfFormats()
+ throws
Exception{
+ List<URL> urlList = new LinkedList<URL>();
+ List<String> urlDescription = new LinkedList<String>();
+ URL url = null;
+
+ url = new URL(TestDocument.getTestServerName()+"tests/xml/xces.xml");
+ assertTrue("Coudn't create a URL object for tests/xml/xces.xml ", url !=
null);
+ urlList.add(url);
+ urlDescription.add(" an XML document ");
+
+ url = new URL(TestDocument.getTestServerName()+"tests/xml/Sentence.xml");
+ assertTrue("Coudn't create a URL object for tests/xml/Sentence.xml",
+ url != null);
+ urlList.add(url);
+ urlDescription.add(" an XML document ");
+
+ url = new URL(TestDocument.getTestServerName()+"tests/html/test1.htm");
+ assertTrue("Coudn't create a URL object for tests/html/test.htm",url !=
null);
+ urlList.add(url);
+ urlDescription.add(" an HTML document ");
+
+ url = new URL(TestDocument.getTestServerName()+"tests/email/test2.eml");
+ assertTrue("Coudn't create a URL object for defg ",url != null);
+ urlList.add(url);
+ urlDescription.add(" an EMAIL document ");
+
+ Iterator<URL> iter = urlList.iterator();
+ Iterator<String> descrIter = urlDescription.iterator();
+ while(iter.hasNext()){
+ runCompleteTestWithAFormat(iter.next(), descrIter.next());
+ }// End While
+
+
+ }// testGateDocumentToAndFromXmlWithDifferentKindOfFormats
+
+ private void runCompleteTestWithAFormat(URL url, String urlDescription)
+ throws Exception{
+ // Load the xml Key Document and unpack it
+ gate.Document keyDocument = null;
+
+ FeatureMap params = Factory.newFeatureMap();
+ params.put(Document.DOCUMENT_URL_PARAMETER_NAME, url);
+ params.put(Document.DOCUMENT_MARKUP_AWARE_PARAMETER_NAME, "false");
+ keyDocument = (Document)Factory.createResource("gate.corpora.DocumentImpl",
+ params);
+
+ assertTrue("Coudn't create a GATE document instance for " +
+ url.toString() +
+ " Can't continue." , keyDocument != null);
+
+ gate.DocumentFormat keyDocFormat = null;
+ keyDocFormat = gate.DocumentFormat.getDocumentFormat(
+ keyDocument, keyDocument.getSourceUrl()
+ );
+
+ assertTrue("Fail to recognize " +
+ url.toString() +
+ " as being " + urlDescription + " !", keyDocFormat != null);
+
+ // Unpack the markup
+ keyDocFormat.unpackMarkup(keyDocument);
+ // Verfy if all annotations from the default annotation set are consistent
+ gate.corpora.TestDocument.verifyNodeIdConsistency(keyDocument);
+
+ // Verifies if the maximum annotation ID on the GATE doc is less than the
+ // Annotation ID generator of the document.
+ verifyAnnotationIDGenerator(keyDocument);
+
+ // Save the size of the document and the number of annotations
+ long keyDocumentSize = keyDocument.getContent().size().longValue();
+ int keyDocumentAnnotationSetSize = keyDocument.getAnnotations().size();
+
+
+ // Export the Gate document called keyDocument as XML, into a temp file,
+ // using the working encoding
+ File xmlFile = null;
+ xmlFile = Files.writeTempFile(keyDocument.toXml(), workingEncoding );
+ assertTrue("The temp GATE XML file is null. Can't continue.",xmlFile !=
null);
+
+ // Load the XML Gate document form the tmp file into memory
+ gate.Document gateDoc = null;
+ gateDoc = gate.Factory.newDocument(xmlFile.toURI().toURL(),
workingEncoding);
+
+ assertTrue("Coudn't create a GATE document instance for " +
+ xmlFile.toURI().toURL().toString() +
+ " Can't continue." , gateDoc != null);
+
+ gate.DocumentFormat gateDocFormat = null;
+ gateDocFormat =
+ DocumentFormat.getDocumentFormat(gateDoc,gateDoc.getSourceUrl());
+
+ assertTrue("Fail to recognize " +
+ xmlFile.toURI().toURL().toString() +
+ " as being a GATE XML document !", gateDocFormat != null);
+
+ gateDocFormat.unpackMarkup(gateDoc);
+ // Verfy if all annotations from the default annotation set are consistent
+ gate.corpora.TestDocument.verifyNodeIdConsistency(gateDoc);
+
+ // Save the size of the document snd the number of annotations
+ long gateDocSize = keyDocument.getContent().size().longValue();
+ int gateDocAnnotationSetSize = keyDocument.getAnnotations().size();
+
+ assertTrue("Exporting as GATE XML resulted in document content size lost."
+
+ " Something went wrong.", keyDocumentSize == gateDocSize);
+
+ assertTrue("Exporting as GATE XML resulted in annotation lost." +
+ " No. of annotations missing = " +
+ Math.abs(keyDocumentAnnotationSetSize - gateDocAnnotationSetSize),
+ keyDocumentAnnotationSetSize == gateDocAnnotationSetSize);
+
+ // Verifies if the maximum annotation ID on the GATE doc is less than the
+ // Annotation ID generator of the document.
+ verifyAnnotationIDGenerator(gateDoc);
+
+ //Don't need tmp Gate XML file.
+ xmlFile.delete();
+ }//runCompleteTestWithAFormat
+
+ /** A test */
+ public void testUnpackMarkup() throws Exception{
+ // create the markupElementsMap map
+ //Map markupElementsMap = null;
+ gate.Document doc = null;
+ /*
+ markupElementsMap = new HashMap();
+ // populate it
+ markupElementsMap.put ("S","Sentence");
+ markupElementsMap.put ("s","Sentence");
+ */
+ // Create the element2String map
+ Map<String,String> anElement2StringMap = new HashMap<String,String>();
+
+ // Populate it
+ anElement2StringMap.put("S","\n");
+ anElement2StringMap.put("s","\n");
+
+ doc = gate.Factory.newDocument(new
URL(TestDocument.getTestServerName()+"tests/xml/xces.xml"), workingEncoding);
+
+ AnnotationSet annotSet = doc.getAnnotations(
+ GateConstants.ORIGINAL_MARKUPS_ANNOT_SET_NAME);
+ assertEquals("For "+doc.getSourceUrl()+" the number of annotations"+
+ " should be:758",758,annotSet.size());
+
+ gate.corpora.TestDocument.verifyNodeIdConsistency(doc);
+
+ // Verifies if the maximum annotation ID on the GATE doc is less than the
+ // Annotation ID generator of the document.
+ verifyAnnotationIDGenerator(doc);
+
+ } // testUnpackMarkup()
+
+ /*
+ * This method runs ANNIE with defaults on a document, then saves
+ * it as a GATE XML document and loads it back. All the annotations on the
+ * loaded document should be the same as the original ones.
+ *
+ * It also verifies if the matches feature still holds after an
export/import to XML
+ */
+ public void testAnnotationConsistencyForSaveAsXml()throws Exception{
+ // Load a document from the test repository
+ //Document origDoc =
gate.Factory.newDocument(Gate.getUrl("tests/xml/gateTestSaveAsXML.xml"));
+ String testDoc =
gate.util.Files.getGateResourceAsString("gate.ac.uk/tests/xml/gateTestSaveAsXML.xml");
+ Document origDoc = gate.Factory.newDocument(testDoc);
+
+ // Verifies if the maximum annotation ID on the origDoc is less than the
+ // Annotation ID generator of the document.
+ verifyAnnotationIDGenerator(origDoc);
+
+ //create a couple of annotations with features we can look at after a
round trip to disc
+ Integer ann1ID =
origDoc.getAnnotations().add(0L,10L,"Test",Factory.newFeatureMap());
+ Integer ann2ID =
origDoc.getAnnotations().add(15L,20L,"Test",Factory.newFeatureMap());
+ origDoc.getAnnotations().get(ann1ID).getFeatures().put("matches",
Arrays.asList(new Integer[]{ann2ID}));
+ origDoc.getAnnotations().get(ann2ID).getFeatures().put("matches",
Arrays.asList(new Integer[]{ann1ID}));
+
+ // SaveAS XML and reload the document into another GATE doc
+ // Export the Gate document called origDoc as XML, into a temp file,
+ // using the working encoding
+ File xmlFile = Files.writeTempFile(origDoc.toXml(),workingEncoding);
+ System.out.println("Saved to temp file :" + xmlFile.toURI().toURL());
+
+ Document reloadedDoc = gate.Factory.newDocument(xmlFile.toURI().toURL(),
workingEncoding);
+ // Verifies if the maximum annotation ID on the origDoc is less than the
+ // Annotation ID generator of the document.
+ verifyAnnotationIDGenerator(reloadedDoc);
+
+ // Verify if the annotations are identical in the two docs.
+ Map<Integer,Annotation> origAnnotMap = buildID2AnnotMap(origDoc);
+ Map<Integer,Annotation> reloadedAnnMap = buildID2AnnotMap(reloadedDoc);
+
+ //Verifies if the reloaded annotations are the same as the original ones
+ verifyIDConsistency(origAnnotMap, reloadedAnnMap);
+
+ // Build the original Matches map
+ // ID -> List of IDs
+ Map<Integer,List<Integer>> origMatchesMap = buildMatchesMap(origDoc);
+ // Verify the consistency of matches
+ // Compare every orig annotation pointed by the MatchesMap with the
reloadedAnnot
+ // extracted from the reloadedMAp
+ for(Iterator<Integer> it = origMatchesMap.keySet().iterator();
it.hasNext();){
+ Integer id = it.next();
+ Annotation origAnnot = origAnnotMap.get(id);
+ assertTrue("Couldn't find an original annot with ID=" + id, origAnnot !=
null);
+ Annotation reloadedAnnot = reloadedAnnMap.get(id);
+ assertTrue("Couldn't find a reloaded annot with ID=" + id, reloadedAnnot
!= null);
+ compareAnnot(origAnnot,reloadedAnnot);
+ // Iterate through the matches list and repeat the comparison
+ List<Integer> matchesList = origMatchesMap.get(id);
+ for (Iterator<Integer> itList = matchesList.iterator();
itList.hasNext();){
+ Integer matchId = itList.next();
+ Annotation origA = origAnnotMap.get(matchId);
+ assertTrue("Couldn't find an original annot with ID=" + matchId, origA
!= null);
+ Annotation reloadedA = reloadedAnnMap.get(matchId);
+ assertTrue("Couldn't find a reloaded annot with ID=" + matchId,
reloadedA != null);
+ compareAnnot(origA, reloadedA);
+ }// End for
+ }// End for
+ // Clean up the XMl file
+ xmlFile.delete();
+ }// End testAnnotationIDConsistencyForSaveAsXml
+
+ /**
+ * Builds a Map based on the matches feature of some annotations. The goal
is to
+ * use this map to validate the annotations from the reloaded document.
+ * In case no Annot has the matches feat, will return an Empty MAP
+ * @param doc The document of which annotations will be used to construct
the map
+ * @return A Map from Annot ID -> Lists of Annot IDs
+ */
+ private Map<Integer,List<Integer>> buildMatchesMap(Document doc){
+ Map<Integer,List<Integer>> matchesMap = new
HashMap<Integer,List<Integer>>();
+ // Scan the default annotation set
+ AnnotationSet annotSet = doc.getAnnotations();
+
+ helperBuildMatchesMap(annotSet, matchesMap);
+ // Scan all named annotation sets
+ if (doc.getNamedAnnotationSets() != null){
+ for ( Iterator<AnnotationSet> namedAnnotSetsIter =
doc.getNamedAnnotationSets().values().iterator();
+
namedAnnotSetsIter.hasNext(); ){
+ helperBuildMatchesMap(namedAnnotSetsIter.next(), matchesMap);
+ }// End while
+ }// End if
+ return matchesMap;
+ }// End of buildMatchesMap()
+
+ /**
+ * This is a helper metod. It scans an annotation set and adds the ID of the
annotations
+ * which have the matches feature to the map.
+ * @param sourceAnnotSet The annotation set investigated
+ * @param aMap
+ */
+ private void helperBuildMatchesMap(AnnotationSet sourceAnnotSet,
Map<Integer,List<Integer>> aMap ){
+
+ for (Iterator<Annotation> it = sourceAnnotSet.iterator(); it.hasNext();){
+ Annotation a = it.next();
+ FeatureMap aFeatMap = a.getFeatures();
+ // Skip those annotations who don't have features
+ if (aFeatMap == null) continue;
+ // Extract the matches feat
+ @SuppressWarnings("unchecked")
+ List<Integer> matchesVal = (List<Integer>) aFeatMap.get("matches");
+ if (matchesVal == null) continue;
+ Integer id = a.getId();
+ aMap.put(id,matchesVal);
+ }//End for
+
+ }// End of helperBuildMatchesMap()
+
+ /**
+ * This method tests if the generator for new Annotation IDs is greather
than the
+ * maximum Annotation ID present in the GATE document. In oter words, it
ensures that
+ * new Annotations will receive an UNIQUE ID.
+ *
+ * @param aDoc The GATE document being tested
+ */
+ protected void verifyAnnotationIDGenerator(gate.Document aDoc){
+ // Creates a MAP containing all the annotations of the document.
+ // In doing so, it also tests if there are annotations with the same ID.
+ Map<Integer,Annotation> id2AnnotationMap = buildID2AnnotMap(aDoc);
+
+ if (id2AnnotationMap == null || id2AnnotationMap.isEmpty()){
+ //System.out.println("No annotations found on the document! Nothing to
test.");
+ return;
+ }
+
+ // Get the key set of the Map and sort them
+ Set<Integer> keysSet = id2AnnotationMap.keySet();
+ TreeSet<Integer> sortedSet = new TreeSet<Integer>(keysSet);
+ // Get the highest Annotation ID
+ Integer maxAnnotId = sortedSet.last();
+ // Compare its value to the one hold by the document's ID generator
+ Integer generatorId = ((DocumentImpl)aDoc).getNextAnnotationId();
+
+// System.out.println("maxAnnotid = " + maxAnnotId + " generatorID = " +
generatorId);
+
+ assertTrue("Annotation ID generator["+generatorId+"] on document [" +
aDoc.getSourceUrl() +
+ "] was equal or less than the MAX Annotation ID["+maxAnnotId+"] on
the document."+
+ " This may lead to Annotation ID conflicts.",
generatorId.intValue() > maxAnnotId.intValue());
+
+
+ }// End of verifyAnnotationIDGenerator()
+
+ /**
+ * Verifies if the two maps hold annotations with the same ID. The only
thing not checked
+ * are the features, as some of them could be lost in the
serialization/deserialization process
+ * @param origAnnotMap A map by ID, containing the original annotations
+ * @param reloadedAnnMap A map by ID, containing the recreated annotations
+ */
+ private void verifyIDConsistency(Map<Integer,Annotation> origAnnotMap,
Map<Integer,Annotation> reloadedAnnMap) {
+ assertEquals("Found a different number of annot in both documents.",
+ origAnnotMap.keySet().size(), reloadedAnnMap.keySet().size());
+
+// List orig = new ArrayList(origAnnotMap.keySet());
+// Collections.sort(orig);
+// System.out.println("ORIG SET =" + orig);
+//
+// List rel = new ArrayList(reloadedAnnMap.keySet());
+// Collections.sort(rel);
+// System.out.println("REL SET =" + rel);
+//
+
+ for (Iterator<Integer> it = origAnnotMap.keySet().iterator();
it.hasNext();){
+ Integer id = it.next();
+ Annotation origAnn = origAnnotMap.get(id);
+ Annotation reloadedAnnot = reloadedAnnMap.get(id);
+
+ assertTrue("Annotation with ID="+ id +" was not found in the reloaded
document.", reloadedAnnot != null);
+ compareAnnot(origAnn, reloadedAnnot);
+
+ }// End for
+ }// End of verifyIDConsistency()
+
+ /**
+ * Thes if two annotatiosn are the same, except their features.
+ * @param origAnn
+ * @param reloadedAnnot
+ */
+ private void compareAnnot(Annotation origAnn, Annotation reloadedAnnot) {
+ assertTrue("Found original and reloaded annot without the same ID!",
+ origAnn.getId().equals(reloadedAnnot.getId()));
+ assertTrue("Found original and reloaded annot without the same TYPE!\n"+
+ "Original was ["+origAnn.getType()+"] and reloaded was
["+reloadedAnnot.getType()+"].",
+ origAnn.getType().equals(reloadedAnnot.getType()));
+ assertTrue("Found original and reloaded annot without the same START
offset!",
+
origAnn.getStartNode().getOffset().equals(reloadedAnnot.getStartNode().getOffset()));
+ assertTrue("Found original and reloaded annot without the same END
offset!",
+
origAnn.getEndNode().getOffset().equals(reloadedAnnot.getEndNode().getOffset()));
+ }// End of compareAnnot()
+
+
+ private Map<Integer,Annotation> addAnnotSet2Map(AnnotationSet annotSet,
Map<Integer,Annotation> id2AnnMap){
+ for (Iterator<Annotation> it = annotSet.iterator(); it.hasNext();){
+ Annotation a = it.next();
+ Integer id = a.getId();
+
+ assertTrue("Found two annotations(one with type = " + a.getType() +
+ ")with the same ID=" + id, !id2AnnMap.keySet().contains(id));
+
+ id2AnnMap.put(id, a);
+ }// End for
+ return id2AnnMap;
+ }
+
+ /**
+ * Scans a target Doc for all Annotations and builds a map (from anot ID to
annot) in the process
+ * I also checks to see if there are two annotations with the same ID.
+ * @param aDoc The GATE doc to be scaned
+ * @return a Map ID2Annot
+ */
+ private Map<Integer,Annotation> buildID2AnnotMap(Document aDoc){
+ Map<Integer,Annotation> id2AnnMap = new HashMap<Integer,Annotation>();
+ // Scan the default annotation set
+ AnnotationSet annotSet = aDoc.getAnnotations();
+ addAnnotSet2Map(annotSet, id2AnnMap);
+ // Scan all named annotation sets
+ if (aDoc.getNamedAnnotationSets() != null){
+ for ( Iterator<AnnotationSet> namedAnnotSetsIter =
aDoc.getNamedAnnotationSets().values().iterator();
+
namedAnnotSetsIter.hasNext(); ){
+
+ addAnnotSet2Map(namedAnnotSetsIter.next(), id2AnnMap);
+ }// End while
+ }// End if
+ return id2AnnMap;
+ }// End of buildID2AnnotMap()
+
+ /** Test suite routine for the test runner */
+ public static Test suite() {
+ return new TestSuite(TestXml.class);
+ } // suite
+
+} // class TestXml
This was sent by the SourceForge.net collaborative development platform, the
world's largest Open Source development site.
------------------------------------------------------------------------------
Check out the vibrant tech community on one of the world's most
engaging tech sites, SlashDot.org! http://sdm.link/slashdot
_______________________________________________
GATE-cvs mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/gate-cvs