Revision: 18262
          http://sourceforge.net/p/gate/code/18262
Author:   markagreenwood
Date:     2014-08-20 17:55:21 +0000 (Wed, 20 Aug 2014)
Log Message:
-----------
the inline XML export format

Modified Paths:
--------------
    gate/trunk/src/main/gate/corpora/export/GateXMLExporter.java
    gate/trunk/src/main/gate/resources/creole/creole.xml

Added Paths:
-----------
    gate/trunk/src/main/gate/corpora/export/InlineXMLExporter.java

Modified: gate/trunk/src/main/gate/corpora/export/GateXMLExporter.java
===================================================================
--- gate/trunk/src/main/gate/corpora/export/GateXMLExporter.java        
2014-08-20 10:31:54 UTC (rev 18261)
+++ gate/trunk/src/main/gate/corpora/export/GateXMLExporter.java        
2014-08-20 17:55:21 UTC (rev 18262)
@@ -13,20 +13,19 @@
 
 package gate.corpora.export;
 
-import java.io.File;
-import java.io.IOException;
-import java.io.OutputStream;
-
-import javax.xml.stream.XMLStreamException;
-
 import gate.Document;
 import gate.DocumentExporter;
 import gate.FeatureMap;
 import gate.corpora.DocumentStaxUtils;
-import gate.corpora.MimeType;
 import gate.creole.metadata.AutoInstance;
 import gate.creole.metadata.CreoleResource;
 
+import java.io.File;
+import java.io.IOException;
+import java.io.OutputStream;
+
+import javax.xml.stream.XMLStreamException;
+
 @CreoleResource(name = "GATE XML Exporter", tool = true, autoinstances = 
@AutoInstance, icon = "GATEXML")
 public class GateXMLExporter extends DocumentExporter {
 

Added: gate/trunk/src/main/gate/corpora/export/InlineXMLExporter.java
===================================================================
--- gate/trunk/src/main/gate/corpora/export/InlineXMLExporter.java              
                (rev 0)
+++ gate/trunk/src/main/gate/corpora/export/InlineXMLExporter.java      
2014-08-20 17:55:21 UTC (rev 18262)
@@ -0,0 +1,174 @@
+/*
+ *  Copyright (c) 1995-2014, The University of Sheffield. See the file
+ *  COPYRIGHT.txt in the software or at http://gate.ac.uk/gate/COPYRIGHT.txt
+ *
+ *  This file is part of GATE (see http://gate.ac.uk/), and is free
+ *  software, licenced under the GNU Library General Public License,
+ *  Version 2, June 1991 (in the distribution as file licence.html,
+ *  and also available at http://gate.ac.uk/gate/licence.html).
+ *
+ *  Mark A. Greenwood 14/08/2014
+ *
+ */
+package gate.corpora.export;
+
+import gate.AnnotationSet;
+import gate.Document;
+import gate.DocumentExporter;
+import gate.Factory;
+import gate.FeatureMap;
+import gate.GateConstants;
+import gate.annotation.AnnotationSetImpl;
+import gate.creole.metadata.AutoInstance;
+import gate.creole.metadata.CreoleParameter;
+import gate.creole.metadata.CreoleResource;
+import gate.creole.metadata.Optional;
+import gate.creole.metadata.RunTime;
+import gate.util.InvalidOffsetException;
+
+import java.io.IOException;
+import java.io.OutputStream;
+import java.io.OutputStreamWriter;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+@CreoleResource(name = "Inline XML Exporter", tool = true, autoinstances = 
@AutoInstance)
+public class InlineXMLExporter extends DocumentExporter {
+
+  private static final long serialVersionUID = -9072204691197080958L;
+
+  private String annotationSetName, rootElement, encoding;
+
+  private List<String> annotationTypes;
+
+  private Boolean includeFeatures, includeOriginalMarkups;
+
+  public InlineXMLExporter() {
+    super("Inline XML", "xml", "text/xml");
+  }
+
+  public String getAnnotationSetName() {
+    return annotationSetName;
+  }
+
+  public String getEncoding() {
+    return encoding;
+  }
+
+  @RunTime
+  @CreoleParameter(defaultValue = "UTF-8")
+  public void setEncoding(String encoding) {
+    this.encoding = encoding;
+  }
+
+  public String getRootElement() {
+    return rootElement;
+  }
+
+  @RunTime
+  @Optional
+  @CreoleParameter()
+  public void setRootElement(String rootElement) {
+    this.rootElement = rootElement;
+  }
+
+  @RunTime
+  @Optional
+  @CreoleParameter
+  public void setAnnotationSetName(String annotationSetName) {
+    this.annotationSetName = annotationSetName;
+  }
+
+  public List<String> getAnnotationTypes() {
+    return annotationTypes;
+  }
+
+  @RunTime
+  @CreoleParameter(defaultValue = "Person;Location;Organization")
+  public void setAnnotationTypes(List<String> annotationTypes) {
+    this.annotationTypes = annotationTypes;
+  }
+
+  public Boolean getIncludeOriginalMarkups() {
+    return includeOriginalMarkups;
+  }
+
+  @RunTime
+  @CreoleParameter(defaultValue = "false")
+  public void setIncludeOriginalMarkups(Boolean includeOriginalMarkups) {
+    this.includeOriginalMarkups = includeOriginalMarkups;
+  }
+
+  public Boolean getIncludeFeatures() {
+    return includeFeatures;
+  }
+
+  @RunTime
+  @CreoleParameter(defaultValue = "true")
+  public void setIncludeFeatures(Boolean includeFeatures) {
+    this.includeFeatures = includeFeatures;
+  }
+
+  @Override
+  public void export(Document doc, OutputStream out, FeatureMap options)
+          throws IOException {
+
+    Integer rootID = null;
+    AnnotationSet withRoot = null;
+
+    AnnotationSet originalMarkups = null;
+    AnnotationSet backupOriginalMarkups = null;
+
+    try {
+      AnnotationSet allAnnots =
+              doc.getAnnotations((String)options.get("annotationSetName"));
+
+      if(!(Boolean)options.get("includeOriginalMarkups")) {
+        originalMarkups =
+                
doc.getAnnotations(GateConstants.ORIGINAL_MARKUPS_ANNOT_SET_NAME);
+        backupOriginalMarkups = new AnnotationSetImpl(originalMarkups);
+        originalMarkups.clear();
+      }
+
+      // first transfer the annotation types from a list to a set
+      @SuppressWarnings("unchecked")
+      Set<String> types2Export =
+              new 
HashSet<String>((List<String>)options.get("annotationTypes"));
+
+      // then get the annotations for export
+      AnnotationSet annots2Export = allAnnots.get(types2Export);
+      withRoot = new AnnotationSetImpl(doc);
+      withRoot.addAll(annots2Export);
+
+      String rootType = (String)options.get("rootElement");
+      if(rootType != null && !"".equals(rootType)) {
+
+        // add the root element to the set
+        rootID =
+                withRoot.add(0L, doc.getContent().size(),
+                        (String)options.get("rootElement"),
+                        Factory.newFeatureMap());
+      }
+
+      // create a writer using the specified encoding
+      OutputStreamWriter writer =
+              new OutputStreamWriter(out, (String)options.get("encoding"));
+
+      // write the document
+      writer.write(doc.toXml(withRoot, 
(Boolean)options.get("includeFeatures")));
+
+      // make sure it gets written
+      writer.flush();
+    } catch(InvalidOffsetException e) {
+      throw new IOException(e);
+    } finally {
+      // delete the fake root element
+      if(rootID != null) withRoot.remove(withRoot.get(rootID));
+
+      // restore the original markups
+      if(backupOriginalMarkups != null)
+        originalMarkups.addAll(backupOriginalMarkups);
+    }
+  }
+}

Modified: gate/trunk/src/main/gate/resources/creole/creole.xml
===================================================================
--- gate/trunk/src/main/gate/resources/creole/creole.xml        2014-08-20 
10:31:54 UTC (rev 18261)
+++ gate/trunk/src/main/gate/resources/creole/creole.xml        2014-08-20 
17:55:21 UTC (rev 18262)
@@ -52,6 +52,9 @@
     <RESOURCE>
       <CLASS>gate.corpora.export.GateXMLExporter</CLASS>
     </RESOURCE>
+     <RESOURCE>
+      <CLASS>gate.corpora.export.InlineXMLExporter</CLASS>
+    </RESOURCE>
   </CREOLE>
 
   <!-- creole.xml for Controllers -->

This was sent by the SourceForge.net collaborative development platform, the 
world's largest Open Source development site.


------------------------------------------------------------------------------
Slashdot TV.  
Video for Nerds.  Stuff that matters.
http://tv.slashdot.org/
_______________________________________________
GATE-cvs mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/gate-cvs

Reply via email to