Revision: 17228
http://sourceforge.net/p/gate/code/17228
Author: valyt
Date: 2014-01-15 11:23:19 +0000 (Wed, 15 Jan 2014)
Log Message:
-----------
Renamed AtomicMentionsIndex to AtomicAnnotationIndex for better naming
consistency.
Added Paths:
-----------
mimir/branches/5.0/mimir-core/src/gate/mimir/index/AtomicAnnotationIndex.java
Removed Paths:
-------------
mimir/branches/5.0/mimir-core/src/gate/mimir/index/AtomicMentionsIndex.java
Copied:
mimir/branches/5.0/mimir-core/src/gate/mimir/index/AtomicAnnotationIndex.java
(from rev 17225,
mimir/branches/5.0/mimir-core/src/gate/mimir/index/AtomicMentionsIndex.java)
===================================================================
---
mimir/branches/5.0/mimir-core/src/gate/mimir/index/AtomicAnnotationIndex.java
(rev 0)
+++
mimir/branches/5.0/mimir-core/src/gate/mimir/index/AtomicAnnotationIndex.java
2014-01-15 11:23:19 UTC (rev 17228)
@@ -0,0 +1,206 @@
+/*
+ * AtomicMentionsIndex.java
+ *
+ * Copyright (c) 2007-2014, The University of Sheffield.
+ *
+ * This file is part of GATE Mímir (see http://gate.ac.uk/family/mimir.html),
+ * and is free software, licenced under the GNU Lesser General Public License,
+ * Version 3, June 2007 (also included with this distribution as file
+ * LICENCE-LGPL3.html).
+ *
+ * Valentin Tablan, 10 Jan 2014
+ *
+ * $Id$
+ */
+package gate.mimir.index;
+
+import gate.Annotation;
+import gate.AnnotationSet;
+import gate.Document;
+import gate.mimir.IndexConfig;
+import gate.mimir.MimirIndex;
+import gate.mimir.SemanticAnnotationHelper;
+import gate.mimir.IndexConfig.SemanticIndexerConfig;
+import gate.mimir.index.mg4j.GATEDocument;
+import gate.util.OffsetComparator;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.BlockingQueue;
+
+import it.unimi.di.big.mg4j.index.Index;
+import it.unimi.di.big.mg4j.index.NullTermProcessor;
+import it.unimi.dsi.lang.ObjectParser;
+
+import org.apache.log4j.Logger;
+
+/**
+ *
+ */
+public class AtomicAnnotationIndex extends AtomicIndex {
+
+ private final static Logger logger =
Logger.getLogger(AtomicAnnotationIndex.class);
+
+ /**
+ * The {@link IndexConfig} used by the {@link MimirIndex} that contains this
+ * mentions index.
+ */
+ protected IndexConfig indexConfig;
+
+ protected SemanticIndexerConfig semIdxConfid;
+ /**
+ * Helpers for each semantic annotation type.
+ */
+ protected Map<String, SemanticAnnotationHelper> annotationHelpers;
+
+ protected List<SemanticAnnotationHelper> documentHelpers;
+
+ /**
+ * An {@link OffsetComparator} used to sort the annotations by offset before
+ * indexing.
+ */
+ protected OffsetComparator offsetComparator;
+
+ /**
+ * @param parent
+ * @param name
+ * @param indexDirectory
+ * @param hasDirectIndex
+ * @param inputQueue
+ * @param outputQueue
+ */
+ public AtomicAnnotationIndex(MimirIndex parent, String name,
+ File indexDirectory, boolean hasDirectIndex,
+ BlockingQueue<GATEDocument> inputQueue,
+ BlockingQueue<GATEDocument> outputQueue,
+ SemanticIndexerConfig siConfig) {
+ super(parent, name, indexDirectory, hasDirectIndex, inputQueue,
outputQueue);
+ this.semIdxConfid = siConfig;
+ indexConfig = parent.getIndexConfig();
+ //get the helpers
+ annotationHelpers = new HashMap<String, SemanticAnnotationHelper>(
+ siConfig.getAnnotationTypes().length);
+ documentHelpers = new LinkedList<SemanticAnnotationHelper>();
+ for(int i = 0; i < siConfig.getAnnotationTypes().length; i++){
+ SemanticAnnotationHelper theHelper = siConfig.getHelpers()[i];
+ if(theHelper.getMode() == SemanticAnnotationHelper.Mode.DOCUMENT) {
+ documentHelpers.add(theHelper);
+ } else {
+ annotationHelpers.put(siConfig.getAnnotationTypes()[i], theHelper);
+ }
+ theHelper.init(this);
+ }
+ offsetComparator = new OffsetComparator();
+ // create and save the term processor
+ termProcessor = NullTermProcessor.getInstance();
+ additionalProperties.setProperty(Index.PropertyKeys.TERMPROCESSOR,
+ ObjectParser.toSpec(termProcessor));
+ // start the indexing thread
+ indexingThread = new Thread(this, "Mimir-" + name + " indexing thread");
+ indexingThread.start();
+ }
+
+ /* (non-Javadoc)
+ * @see
gate.mimir.index.AtomicIndex#getAnnotsToProcess(gate.mimir.index.mg4j.GATEDocument)
+ */
+ @Override
+ protected Annotation[] getAnnotsToProcess(GATEDocument gateDocument)
+ throws IndexException {
+ Document document = gateDocument.getDocument();
+ Annotation[] semanticAnnots;
+ AnnotationSet semAnnSet =
+ (indexConfig.getSemanticAnnotationSetName() == null ||
+ indexConfig.getSemanticAnnotationSetName().length() == 0) ?
+ document.getAnnotations() :
+ document.getAnnotations(indexConfig.getSemanticAnnotationSetName());
+ if(semAnnSet.size() > 0){
+ AnnotationSet semAnns = null;
+ synchronized(semAnnSet) {
+ semAnns = semAnnSet.get(annotationHelpers.keySet());
+ }
+ semanticAnnots = semAnns.toArray(new Annotation[semAnns.size()]);
+ Arrays.sort(semanticAnnots, offsetComparator);
+ }else{
+ semanticAnnots = new Annotation[0];
+ }
+ return semanticAnnots;
+ }
+
+ /* (non-Javadoc)
+ * @see
gate.mimir.index.AtomicIndex#calculateStartPositionForAnnotation(gate.Annotation,
gate.mimir.index.mg4j.GATEDocument)
+ */
+ @Override
+ protected void calculateStartPositionForAnnotation(Annotation ann,
+ GATEDocument gateDocument) throws IndexException {
+ if(ann == null) {
+ // we're supposed index the document metadata
+ tokenPosition = 0;
+ } else {
+ //calculate the term position for the current semantic annotation
+ while(tokenPosition < gateDocument.getTokenAnnots().length &&
+ gateDocument.getTokenAnnots()[tokenPosition].
+ getEndNode().getOffset().longValue() <=
+ ann.getStartNode().getOffset().longValue()){
+ tokenPosition++;
+ }
+ //check if lastTokenposition is valid
+ if(tokenPosition >= gateDocument.getTokenAnnots().length){
+ //malfunction
+ logger.error(
+ "Semantic annotation [Type:" + ann.getType() +
+ ", start: " + ann.getStartNode().getOffset().toString() +
+ ", end: " + ann.getEndNode().getOffset().toString() +
+ "] outside of the tokens area in document" +
+ " URI: " + gateDocument.uri() +
+ " Title: " + gateDocument.title());
+ }
+ }
+ }
+
+ /* (non-Javadoc)
+ * @see
gate.mimir.index.AtomicIndex#calculateTermStringForAnnotation(gate.Annotation,
gate.mimir.index.mg4j.GATEDocument)
+ */
+ @Override
+ protected String[] calculateTermStringForAnnotation(Annotation ann,
+ GATEDocument gateDocument) throws IndexException {
+ if(ann == null) {
+ // obtain the URIs to be indexed for the *document* metadata
+ List<String> terms = new LinkedList<String>();
+ for(SemanticAnnotationHelper aHelper : documentHelpers) {
+ String[] someTerms = aHelper.getMentionUris(null, Mention.NO_LENGTH,
this);
+ if(someTerms != null) {
+ for(String aTerm : someTerms) {
+ terms.add(aTerm);
+ }
+ }
+ }
+ return terms.toArray(new String[terms.size()]);
+ } else {
+ //calculate the annotation length (as number of terms)
+ SemanticAnnotationHelper helper = annotationHelpers.get(ann.getType());
+ int length = 1;
+ while(tokenPosition + length < gateDocument.getTokenAnnots().length &&
+ gateDocument.getTokenAnnots()[tokenPosition + length].
+ getStartNode().getOffset().longValue() <
+ ann.getEndNode().getOffset().longValue()){
+ length++;
+ }
+ //get the annotation URI
+ return helper.getMentionUris(ann, length, this);
+ }
+ }
+
+ @Override
+ protected void flush() throws IOException {
+ for(SemanticAnnotationHelper sah : annotationHelpers.values()) {
+ sah.close(this);
+ }
+ }
+
+
+}
Deleted:
mimir/branches/5.0/mimir-core/src/gate/mimir/index/AtomicMentionsIndex.java
===================================================================
--- mimir/branches/5.0/mimir-core/src/gate/mimir/index/AtomicMentionsIndex.java
2014-01-14 17:26:50 UTC (rev 17227)
+++ mimir/branches/5.0/mimir-core/src/gate/mimir/index/AtomicMentionsIndex.java
2014-01-15 11:23:19 UTC (rev 17228)
@@ -1,206 +0,0 @@
-/*
- * AtomicMentionsIndex.java
- *
- * Copyright (c) 2007-2014, The University of Sheffield.
- *
- * This file is part of GATE Mímir (see http://gate.ac.uk/family/mimir.html),
- * and is free software, licenced under the GNU Lesser General Public License,
- * Version 3, June 2007 (also included with this distribution as file
- * LICENCE-LGPL3.html).
- *
- * Valentin Tablan, 10 Jan 2014
- *
- * $Id$
- */
-package gate.mimir.index;
-
-import gate.Annotation;
-import gate.AnnotationSet;
-import gate.Document;
-import gate.mimir.IndexConfig;
-import gate.mimir.MimirIndex;
-import gate.mimir.SemanticAnnotationHelper;
-import gate.mimir.IndexConfig.SemanticIndexerConfig;
-import gate.mimir.index.mg4j.GATEDocument;
-import gate.util.OffsetComparator;
-
-import java.io.File;
-import java.io.IOException;
-import java.util.Arrays;
-import java.util.HashMap;
-import java.util.LinkedList;
-import java.util.List;
-import java.util.Map;
-import java.util.concurrent.BlockingQueue;
-
-import it.unimi.di.big.mg4j.index.Index;
-import it.unimi.di.big.mg4j.index.NullTermProcessor;
-import it.unimi.dsi.lang.ObjectParser;
-
-import org.apache.log4j.Logger;
-
-/**
- *
- */
-public class AtomicMentionsIndex extends AtomicIndex {
-
- private final static Logger logger =
Logger.getLogger(AtomicMentionsIndex.class);
-
- /**
- * The {@link IndexConfig} used by the {@link MimirIndex} that contains this
- * mentions index.
- */
- protected IndexConfig indexConfig;
-
- protected SemanticIndexerConfig semIdxConfid;
- /**
- * Helpers for each semantic annotation type.
- */
- protected Map<String, SemanticAnnotationHelper> annotationHelpers;
-
- protected List<SemanticAnnotationHelper> documentHelpers;
-
- /**
- * An {@link OffsetComparator} used to sort the annotations by offset before
- * indexing.
- */
- protected OffsetComparator offsetComparator;
-
- /**
- * @param parent
- * @param name
- * @param indexDirectory
- * @param hasDirectIndex
- * @param inputQueue
- * @param outputQueue
- */
- public AtomicMentionsIndex(MimirIndex parent, String name,
- File indexDirectory, boolean hasDirectIndex,
- BlockingQueue<GATEDocument> inputQueue,
- BlockingQueue<GATEDocument> outputQueue,
- SemanticIndexerConfig siConfig) {
- super(parent, name, indexDirectory, hasDirectIndex, inputQueue,
outputQueue);
- this.semIdxConfid = siConfig;
- indexConfig = parent.getIndexConfig();
- //get the helpers
- annotationHelpers = new HashMap<String, SemanticAnnotationHelper>(
- siConfig.getAnnotationTypes().length);
- documentHelpers = new LinkedList<SemanticAnnotationHelper>();
- for(int i = 0; i < siConfig.getAnnotationTypes().length; i++){
- SemanticAnnotationHelper theHelper = siConfig.getHelpers()[i];
- if(theHelper.getMode() == SemanticAnnotationHelper.Mode.DOCUMENT) {
- documentHelpers.add(theHelper);
- } else {
- annotationHelpers.put(siConfig.getAnnotationTypes()[i], theHelper);
- }
- theHelper.init(this);
- }
- offsetComparator = new OffsetComparator();
- // create and save the term processor
- termProcessor = NullTermProcessor.getInstance();
- additionalProperties.setProperty(Index.PropertyKeys.TERMPROCESSOR,
- ObjectParser.toSpec(termProcessor));
- // start the indexing thread
- indexingThread = new Thread(this, "Mimir-" + name + " indexing thread");
- indexingThread.start();
- }
-
- /* (non-Javadoc)
- * @see
gate.mimir.index.AtomicIndex#getAnnotsToProcess(gate.mimir.index.mg4j.GATEDocument)
- */
- @Override
- protected Annotation[] getAnnotsToProcess(GATEDocument gateDocument)
- throws IndexException {
- Document document = gateDocument.getDocument();
- Annotation[] semanticAnnots;
- AnnotationSet semAnnSet =
- (indexConfig.getSemanticAnnotationSetName() == null ||
- indexConfig.getSemanticAnnotationSetName().length() == 0) ?
- document.getAnnotations() :
- document.getAnnotations(indexConfig.getSemanticAnnotationSetName());
- if(semAnnSet.size() > 0){
- AnnotationSet semAnns = null;
- synchronized(semAnnSet) {
- semAnns = semAnnSet.get(annotationHelpers.keySet());
- }
- semanticAnnots = semAnns.toArray(new Annotation[semAnns.size()]);
- Arrays.sort(semanticAnnots, offsetComparator);
- }else{
- semanticAnnots = new Annotation[0];
- }
- return semanticAnnots;
- }
-
- /* (non-Javadoc)
- * @see
gate.mimir.index.AtomicIndex#calculateStartPositionForAnnotation(gate.Annotation,
gate.mimir.index.mg4j.GATEDocument)
- */
- @Override
- protected void calculateStartPositionForAnnotation(Annotation ann,
- GATEDocument gateDocument) throws IndexException {
- if(ann == null) {
- // we're supposed index the document metadata
- tokenPosition = 0;
- } else {
- //calculate the term position for the current semantic annotation
- while(tokenPosition < gateDocument.getTokenAnnots().length &&
- gateDocument.getTokenAnnots()[tokenPosition].
- getEndNode().getOffset().longValue() <=
- ann.getStartNode().getOffset().longValue()){
- tokenPosition++;
- }
- //check if lastTokenposition is valid
- if(tokenPosition >= gateDocument.getTokenAnnots().length){
- //malfunction
- logger.error(
- "Semantic annotation [Type:" + ann.getType() +
- ", start: " + ann.getStartNode().getOffset().toString() +
- ", end: " + ann.getEndNode().getOffset().toString() +
- "] outside of the tokens area in document" +
- " URI: " + gateDocument.uri() +
- " Title: " + gateDocument.title());
- }
- }
- }
-
- /* (non-Javadoc)
- * @see
gate.mimir.index.AtomicIndex#calculateTermStringForAnnotation(gate.Annotation,
gate.mimir.index.mg4j.GATEDocument)
- */
- @Override
- protected String[] calculateTermStringForAnnotation(Annotation ann,
- GATEDocument gateDocument) throws IndexException {
- if(ann == null) {
- // obtain the URIs to be indexed for the *document* metadata
- List<String> terms = new LinkedList<String>();
- for(SemanticAnnotationHelper aHelper : documentHelpers) {
- String[] someTerms = aHelper.getMentionUris(null, Mention.NO_LENGTH,
this);
- if(someTerms != null) {
- for(String aTerm : someTerms) {
- terms.add(aTerm);
- }
- }
- }
- return terms.toArray(new String[terms.size()]);
- } else {
- //calculate the annotation length (as number of terms)
- SemanticAnnotationHelper helper = annotationHelpers.get(ann.getType());
- int length = 1;
- while(tokenPosition + length < gateDocument.getTokenAnnots().length &&
- gateDocument.getTokenAnnots()[tokenPosition + length].
- getStartNode().getOffset().longValue() <
- ann.getEndNode().getOffset().longValue()){
- length++;
- }
- //get the annotation URI
- return helper.getMentionUris(ann, length, this);
- }
- }
-
- @Override
- protected void flush() throws IOException {
- for(SemanticAnnotationHelper sah : annotationHelpers.values()) {
- sah.close(this);
- }
- }
-
-
-}
This was sent by the SourceForge.net collaborative development platform, the
world's largest Open Source development site.
------------------------------------------------------------------------------
CenturyLink Cloud: The Leader in Enterprise Cloud Services.
Learn Why More Businesses Are Choosing CenturyLink Cloud For
Critical Workloads, Development Environments & Everything In Between.
Get a Quote or Start a Free Trial Today.
http://pubads.g.doubleclick.net/gampad/clk?id=119420431&iu=/4140/ostg.clktrk
_______________________________________________
GATE-cvs mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/gate-cvs