Revision: 18412
http://sourceforge.net/p/gate/code/18412
Author: ian_roberts
Date: 2014-10-27 20:28:17 +0000 (Mon, 27 Oct 2014)
Log Message:
-----------
Merging deletion and clustering fixes from trunk, version number to 5.0.1
Modified Paths:
--------------
mimir/branches/5.0/build.properties
mimir/branches/5.0/doc/changes.tex
mimir/branches/5.0/doc/mimir-guide.pdf
mimir/branches/5.0/doc/mimir-version.tex
mimir/branches/5.0/mimir-client/src/gate/mimir/index/MimirConnector.java
mimir/branches/5.0/mimir-cloud/application.properties
mimir/branches/5.0/mimir-core/build/ivy.xml
mimir/branches/5.0/mimir-core/src/gate/mimir/index/AtomicIndex.java
mimir/branches/5.0/mimir-core/src/gate/mimir/search/RankingQueryRunnerImpl.java
Property Changed:
----------------
mimir/branches/5.0/
mimir/branches/5.0/doc/
mimir/branches/5.0/mimir-core/src/
Index: mimir/branches/5.0
===================================================================
--- mimir/branches/5.0 2014-10-27 20:20:36 UTC (rev 18411)
+++ mimir/branches/5.0 2014-10-27 20:28:17 UTC (rev 18412)
Property changes on: mimir/branches/5.0
___________________________________________________________________
Modified: svn:mergeinfo
## -1,4 +1,4 ##
/mimir/branches/3.4:14623,14634-14643,14687
/mimir/branches/4.0:15380-15383,15385-15386,15388
/mimir/branches/4.x:14298-14348
-/mimir/trunk:17278,18060,18064
+/mimir/trunk:17278,18060,18064,18183,18271,18409,18411
\ No newline at end of property
Modified: mimir/branches/5.0/build.properties
===================================================================
--- mimir/branches/5.0/build.properties 2014-10-27 20:20:36 UTC (rev 18411)
+++ mimir/branches/5.0/build.properties 2014-10-27 20:28:17 UTC (rev 18412)
@@ -1,4 +1,4 @@
-app.version=5.0
+app.version=5.0.1
mimir-core.dirname=mimir-core
mimir-client.dirname=mimir-client
plugins.dirname=plugins
Index: mimir/branches/5.0/doc
===================================================================
--- mimir/branches/5.0/doc 2014-10-27 20:20:36 UTC (rev 18411)
+++ mimir/branches/5.0/doc 2014-10-27 20:28:17 UTC (rev 18412)
Property changes on: mimir/branches/5.0/doc
___________________________________________________________________
Modified: svn:mergeinfo
## -2,4 +2,4 ##
/mimir/branches/4.0/doc:15380-15383,15385-15386,15388
/mimir/branches/4.1/doc:15778-15818
/mimir/branches/4.x/doc:14298-14348
-/mimir/trunk/doc:18060,18064
+/mimir/trunk/doc:18060,18064,18411
\ No newline at end of property
Modified: mimir/branches/5.0/doc/changes.tex
===================================================================
--- mimir/branches/5.0/doc/changes.tex 2014-10-27 20:20:36 UTC (rev 18411)
+++ mimir/branches/5.0/doc/changes.tex 2014-10-27 20:28:17 UTC (rev 18412)
@@ -1,6 +1,14 @@
This appendix details the main changes in each \Mimir\ release.
+\section{Version 5.0.1 (October 2014)}
+Two critical fixes:
+\begin{itemize}
+ \item Deletion of documents now works correctly, it had been broken in
+ version 5.0
+ \item Fixed clustering logic for multi-batch indexes.
+\end{itemize}
+
\section{Version 5.0 (February 2014)}
\begin{itemize}
\item \Mimir{} indexes are now updateable: new documents can be submitted for
Modified: mimir/branches/5.0/doc/mimir-guide.pdf
===================================================================
(Binary files differ)
Modified: mimir/branches/5.0/doc/mimir-version.tex
===================================================================
--- mimir/branches/5.0/doc/mimir-version.tex 2014-10-27 20:20:36 UTC (rev
18411)
+++ mimir/branches/5.0/doc/mimir-version.tex 2014-10-27 20:28:17 UTC (rev
18412)
@@ -8,4 +8,4 @@
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-\newcommand{\mimirversion}{5.0}
\ No newline at end of file
+\newcommand{\mimirversion}{5.0.1}
\ No newline at end of file
Modified:
mimir/branches/5.0/mimir-client/src/gate/mimir/index/MimirConnector.java
===================================================================
--- mimir/branches/5.0/mimir-client/src/gate/mimir/index/MimirConnector.java
2014-10-27 20:20:36 UTC (rev 18411)
+++ mimir/branches/5.0/mimir-client/src/gate/mimir/index/MimirConnector.java
2014-10-27 20:28:17 UTC (rev 18412)
@@ -144,7 +144,7 @@
synchronized(this) {
if(doc != null){
- objectOutputStream.writeObject(doc);
+ objectOutputStream.writeUnshared(doc);
}
if(byteBuffer.size() > BYTE_BUFFER_SIZE) {
writeBuffer(); // this will also empty (reset) the buffer
Modified: mimir/branches/5.0/mimir-cloud/application.properties
===================================================================
--- mimir/branches/5.0/mimir-cloud/application.properties 2014-10-27
20:20:36 UTC (rev 18411)
+++ mimir/branches/5.0/mimir-cloud/application.properties 2014-10-27
20:28:17 UTC (rev 18412)
@@ -1,5 +1,5 @@
#Grails Metadata file
-#Tue Jun 10 17:44:39 BST 2014
+#Mon Oct 27 20:23:42 GMT 2014
app.grails.version=2.2.3
app.name=mimir-cloud
-app.version=5.0
+app.version=5.0.1
Modified: mimir/branches/5.0/mimir-core/build/ivy.xml
===================================================================
--- mimir/branches/5.0/mimir-core/build/ivy.xml 2014-10-27 20:20:36 UTC (rev
18411)
+++ mimir/branches/5.0/mimir-core/build/ivy.xml 2014-10-27 20:28:17 UTC (rev
18412)
@@ -3,7 +3,7 @@
<info
organisation="uk.ac.gate"
module="mimir-core"
- revision="5.0">
+ revision="5.0.1">
<description homepage="http://gate.ac.uk/mimir" />
</info>
Index: mimir/branches/5.0/mimir-core/src
===================================================================
--- mimir/branches/5.0/mimir-core/src 2014-10-27 20:20:36 UTC (rev 18411)
+++ mimir/branches/5.0/mimir-core/src 2014-10-27 20:28:17 UTC (rev 18412)
Property changes on: mimir/branches/5.0/mimir-core/src
___________________________________________________________________
Modified: svn:mergeinfo
## -1,3 +1,4 ##
/mimir/branches/3.4/mimir-core/src:14623,14634-14643
/mimir/branches/4.0/mimir-core/src:15380-15383,15385-15386,15388
/mimir/branches/4.x/mimir-core/src:14299-14316
+/mimir/trunk/mimir-core/src:18271,18409
\ No newline at end of property
Modified: mimir/branches/5.0/mimir-core/src/gate/mimir/index/AtomicIndex.java
===================================================================
--- mimir/branches/5.0/mimir-core/src/gate/mimir/index/AtomicIndex.java
2014-10-27 20:20:36 UTC (rev 18411)
+++ mimir/branches/5.0/mimir-core/src/gate/mimir/index/AtomicIndex.java
2014-10-27 20:28:17 UTC (rev 18412)
@@ -454,7 +454,10 @@
// prepare the documental cluster
Index[] indexes = new Index[batches.size()];
- long[] cutPoints = new long[indexes.length];
+ // cut points between the batches - there are numBatches+1 cutpoints,
+ // cutPoints[0] is always zero, and cutPoints[i] is the sum of the
+ // sizes of batches 0 to i-1 inclusive
+ long[] cutPoints = new long[indexes.length + 1];
cutPoints[0] = 0;
int numberOfTerms = -1;
int numberOfDocuments = -1;
@@ -468,10 +471,8 @@
for(MG4JIndex aSubIndex : batches) {
indexes[indexIdx] = aSubIndex.invertedIndex;
- if(indexIdx < cutPoints.length - 1) {
- cutPoints[indexIdx + 1] = cutPoints[indexIdx] +
- aSubIndex.invertedIndex.numberOfDocuments;
- }
+ cutPoints[indexIdx + 1] = cutPoints[indexIdx] +
+ aSubIndex.invertedIndex.numberOfDocuments;
numberOfTerms += aSubIndex.invertedIndex.numberOfTerms;
numberOfDocuments += aSubIndex.invertedIndex.numberOfDocuments;
numberOfPostings += aSubIndex.invertedIndex.numberOfPostings;
Modified:
mimir/branches/5.0/mimir-core/src/gate/mimir/search/RankingQueryRunnerImpl.java
===================================================================
---
mimir/branches/5.0/mimir-core/src/gate/mimir/search/RankingQueryRunnerImpl.java
2014-10-27 20:20:36 UTC (rev 18411)
+++
mimir/branches/5.0/mimir-core/src/gate/mimir/search/RankingQueryRunnerImpl.java
2014-10-27 20:28:17 UTC (rev 18412)
@@ -139,6 +139,9 @@
docIndex = (documentIndexes != null ?
documentIndexes[(int)(i - start)] : i);
docId = documentIds.getLong(docIndex);
+ // don't need to check for deletion here as we know for sure that
this
+ // doc ID is ok. The only exception would be if it was deleted since
+ // this query was originally issued, but I think we can live with
that
long newDoc = queryExecutor.nextDocument(docId - 1);
// sanity check
if(newDoc == docId) {
@@ -201,7 +204,7 @@
try{
// collect all documents and their scores
if(ranking) scorer.wrap(queryExecutor);
- long docId = ranking ? scorer.nextDocument(-1) :
queryExecutor.nextDocument(-1);
+ long docId = nextNotDeleted();
while(docId >= 0) {
// enlarge the hits list
if(ranking){
@@ -223,7 +226,7 @@
}
// and store the new doc ID
documentIds.add(docId);
- docId = ranking ? scorer.nextDocument(-1) :
queryExecutor.nextDocument(-1);
+ docId = nextNotDeleted();
}
allDocIdsCollected = true;
if(ranking) {
@@ -734,4 +737,19 @@
}
}
}
+
+ /**
+ * Find the next document ID for the current query executor which is not
+ * marked as deleted in the index.
+ */
+ protected long nextNotDeleted() throws IOException {
+ long docId = ranking ? scorer.nextDocument(-1)
+ : queryExecutor.nextDocument(-1);
+ while(docId >= 0 && queryEngine.getIndex().isDeleted(docId)) {
+ docId = ranking ? scorer.nextDocument(-1)
+ : queryExecutor.nextDocument(-1);
+ }
+
+ return docId;
+ }
}
\ No newline at end of file
This was sent by the SourceForge.net collaborative development platform, the
world's largest Open Source development site.
------------------------------------------------------------------------------
_______________________________________________
GATE-cvs mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/gate-cvs