Revision: 15980
http://gate.svn.sourceforge.net/gate/?rev=15980&view=rev
Author: valyt
Date: 2012-07-25 10:48:42 +0000 (Wed, 25 Jul 2012)
Log Message:
-----------
Semantic annotation helpers get an extra method to check if a mention URI is
theirs or not.
This is now used for term queries against annotation indexes (where it's
possible that more than one annotation helpers are contributing mention URIs).
This allows term queries to only return terms that refer to the actual
annotation type requested in the query.
Modified Paths:
--------------
mimir/trunk/mimir-core/src/gate/mimir/SemanticAnnotationHelper.java
mimir/trunk/mimir-core/src/gate/mimir/search/terms/AbstractIndexTermsQuery.java
mimir/trunk/mimir-core/src/gate/mimir/util/DelegatingSemanticAnnotationHelper.java
Modified: mimir/trunk/mimir-core/src/gate/mimir/SemanticAnnotationHelper.java
===================================================================
--- mimir/trunk/mimir-core/src/gate/mimir/SemanticAnnotationHelper.java
2012-07-25 10:45:23 UTC (rev 15979)
+++ mimir/trunk/mimir-core/src/gate/mimir/SemanticAnnotationHelper.java
2012-07-25 10:48:42 UTC (rev 15980)
@@ -143,9 +143,27 @@
* call.
* @return a textual representation of the specified mention.
*/
- public String describeMention(String mentionUri) ;
+ public String describeMention(String mentionUri);
/**
+ * Checks whether the supplied string <strong>looks like</strong> a valid
+ * mention URI that may have been returned by a call to
+ * {@link #getMentions(String, List, QueryEngine)} or
+ * {@link #getMentions(String, Map, QueryEngine)}.
+ *
+ * Note that this is a superficial test that may be able to distinguish a URI
+ * produced by this helper from one produced by another. It will not
actually
+ * access any data structure to check that the URI really is valid. The main
+ * use case for this call is to distinguish different URIs indexed in the
same
+ * annotations index, but produced by different helpers.
+ *
+ * @param mentionUri the URI to test.
+ * @return <code>true</code> if this URI looks like an URI produced by this
+ * helper.
+ */
+ public boolean isMentionUri(String mentionUri);
+
+ /**
* Closes this annotation helper. Implementers should perform maintenance
* operations (such as closing connections to ORDI, etc) on this call.
*/
Modified:
mimir/trunk/mimir-core/src/gate/mimir/search/terms/AbstractIndexTermsQuery.java
===================================================================
---
mimir/trunk/mimir-core/src/gate/mimir/search/terms/AbstractIndexTermsQuery.java
2012-07-25 10:45:23 UTC (rev 15979)
+++
mimir/trunk/mimir-core/src/gate/mimir/search/terms/AbstractIndexTermsQuery.java
2012-07-25 10:48:42 UTC (rev 15980)
@@ -39,7 +39,7 @@
* index includes multiple sub-indexes (some storing tokens, other storing
* annotations), identified by a name. For token indexes, the index name is
* the name of the token feature being indexed; for annotation indexes, the
- * index name is the annotation type.
+ * index name is the annotation type.
*/
protected final String indexName;
@@ -60,8 +60,9 @@
protected IndexReaderPool indirectIndexPool;
/**
- * If {@link #indexType} is {@link IndexType#ANNOTATIONS}, this holds a
- * reference to the annotation helper.
+ * The semantic annotation helper for the correct annotation type (as
+ * given by {@link #indexName}), if {@link #indexType} is
+ * {@link IndexType#ANNOTATIONS}, <code>null</code> otherwise.
*/
protected SemanticAnnotationHelper annotationHelper;
@@ -130,7 +131,21 @@
"yours", "yourself", "yourselves"
};
-
+ /**
+ *
+ * @param indexName The name of the subindex in which the terms are sought.
+ * Each MÃmir index includes multiple sub-indexes (some storing tokens,
+ * other storing annotations), identified by a name. For token indexes,
+ * the index name is the name of the token feature being indexed; for
+ * annotation indexes, the index name is the annotation type.
+ * @param indexType The type of index to be searched (tokens or annotations).
+ *
+ * @param stringsEnabled should term strings be obtained?
+ *
+ * @param countsEnabled should term counts be obtained?
+ *
+ * @param limit the maximum number of terms to return.
+ */
public AbstractIndexTermsQuery(String indexName, IndexType indexType,
boolean stringsEnabled, boolean countsEnabled, int limit) {
super(stringsEnabled, countsEnabled, limit);
@@ -199,31 +214,41 @@
}
long termId = documentIterator.nextDocument();
- while(termId != DocumentIterator.END_OF_LIST && termId != -1 &&
+ terms:while(termId != DocumentIterator.END_OF_LIST && termId != -1 &&
termIds.size() < limit) {
String termString = null;
- if(isStopWordsBlocked() || stringsEnabled) {
+ // get the term string, if required
+ if(// if stop words are blocked, we need to check the term string
+ isStopWordsBlocked() ||
+ // if strings enabled, we need the term string so we can return it
+ stringsEnabled ||
+ // if annotation index, we need the term to check for the right
+ // annotation type inside the index (which may include multiple types)
+ indexType == IndexType.ANNOTATIONS) {
termString = indirectIndexPool.getTerm(termId);
}
if(stopWordsBlocked && stopWordsSet.contains(termString)) {
- // do nothing
- } else {
- termIds.add(termId);
- if(countsEnabled){
- counterSetupVisitor.clear();
- documentIterator.acceptOnTruePaths( counterCollectionVisitor );
- int count = 0;
- for (int aCount : counterSetupVisitor.count ) count += aCount;
- termCounts.add(count);
- }
- if(stringsEnabled){
- if(indexType == IndexType.ANNOTATIONS) {
- // describe the term
- termString = annotationHelper.describeMention(termString);
- }
- termStrings.add(termString);
- }
+ // skip this term
+ termId = documentIterator.nextDocument();
+ continue terms;
}
+ if(indexType == IndexType.ANNOTATIONS &&
+ (!annotationHelper.isMentionUri(termString))){
+ // skip this term
+ termId = documentIterator.nextDocument();
+ continue terms;
+ }
+ termIds.add(termId);
+ if(countsEnabled){
+ counterSetupVisitor.clear();
+ documentIterator.acceptOnTruePaths( counterCollectionVisitor );
+ int count = 0;
+ for (int aCount : counterSetupVisitor.count ) count += aCount;
+ termCounts.add(count);
+ }
+ if(stringsEnabled){
+ termStrings.add(termString);
+ }
termId = documentIterator.nextDocument();
}
// construct the result
Modified:
mimir/trunk/mimir-core/src/gate/mimir/util/DelegatingSemanticAnnotationHelper.java
===================================================================
---
mimir/trunk/mimir-core/src/gate/mimir/util/DelegatingSemanticAnnotationHelper.java
2012-07-25 10:45:23 UTC (rev 15979)
+++
mimir/trunk/mimir-core/src/gate/mimir/util/DelegatingSemanticAnnotationHelper.java
2012-07-25 10:48:42 UTC (rev 15980)
@@ -213,7 +213,24 @@
return delegate.getMentions(annotationType, constraints, engine);
}
+
+ /* (non-Javadoc)
+ * @see
gate.mimir.AbstractSemanticAnnotationHelper#describeMention(java.lang.String)
+ */
@Override
+ public String describeMention(String mentionUri) {
+ return delegate.describeMention(mentionUri);
+ }
+
+ /* (non-Javadoc)
+ * @see gate.mimir.SemanticAnnotationHelper#isMentionUri(java.lang.String)
+ */
+ @Override
+ public boolean isMentionUri(String mentionUri) {
+ return delegate.isMentionUri(mentionUri);
+ }
+
+ @Override
public void documentEnd() {
delegate.documentEnd();
}
This was sent by the SourceForge.net collaborative development platform, the
world's largest Open Source development site.
------------------------------------------------------------------------------
Live Security Virtual Conference
Exclusive live event will cover all the ways today's security and
threat landscape has changed and how IT managers can respond. Discussions
will include endpoint security, mobile security and the latest in malware
threats. http://www.accelacomm.com/jaw/sfrnl04242012/114/50122263/
_______________________________________________
GATE-cvs mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/gate-cvs