Author: rwesten
Date: Thu Jul 26 11:35:05 2012
New Revision: 1365952
URL: http://svn.apache.org/viewvc?rev=1365952&view=rev
Log:
added Epoch to the IndexingSource interface. Adapted ChangeSet accordingly
(STANBOL-498).
Added:
incubator/stanbol/branches/contenthub-two-layered-structure/commons/semanticindex/servicesapi/src/main/java/org/apache/stanbol/commons/semanticindex/store/EpochException.java
Modified:
incubator/stanbol/branches/contenthub-two-layered-structure/commons/semanticindex/core/src/main/java/org/apache/stanbol/commons/semanticindex/core/store/ChangeSetImpl.java
incubator/stanbol/branches/contenthub-two-layered-structure/commons/semanticindex/servicesapi/src/main/java/org/apache/stanbol/commons/semanticindex/store/ChangeSet.java
incubator/stanbol/branches/contenthub-two-layered-structure/commons/semanticindex/servicesapi/src/main/java/org/apache/stanbol/commons/semanticindex/store/IndexingSource.java
Modified:
incubator/stanbol/branches/contenthub-two-layered-structure/commons/semanticindex/core/src/main/java/org/apache/stanbol/commons/semanticindex/core/store/ChangeSetImpl.java
URL:
http://svn.apache.org/viewvc/incubator/stanbol/branches/contenthub-two-layered-structure/commons/semanticindex/core/src/main/java/org/apache/stanbol/commons/semanticindex/core/store/ChangeSetImpl.java?rev=1365952&r1=1365951&r2=1365952&view=diff
==============================================================================
---
incubator/stanbol/branches/contenthub-two-layered-structure/commons/semanticindex/core/src/main/java/org/apache/stanbol/commons/semanticindex/core/store/ChangeSetImpl.java
(original)
+++
incubator/stanbol/branches/contenthub-two-layered-structure/commons/semanticindex/core/src/main/java/org/apache/stanbol/commons/semanticindex/core/store/ChangeSetImpl.java
Thu Jul 26 11:35:05 2012
@@ -16,17 +16,40 @@
*/
package org.apache.stanbol.commons.semanticindex.core.store;
-import java.util.Set;
+import java.util.Collections;
+import java.util.Iterator;
import org.apache.stanbol.commons.semanticindex.store.ChangeSet;
-import org.apache.stanbol.commons.semanticindex.store.Store;
+import org.apache.stanbol.commons.semanticindex.store.IndexingSource;
public class ChangeSetImpl<Item> implements ChangeSet<Item> {
- private long from;
- private long to;
- private Set<String> changedUris;
- private Store<Item> store;
-
+ private final long from;
+ private final long to;
+ private final long epoch;
+ private final Iterable<String> changedUris;
+ private final IndexingSource<Item> source;
+
+ public ChangeSetImpl(IndexingSource<Item> source,long epoch,long from,
long to, Iterable<String> changed) {
+ if(source == null){
+ throw new IllegalArgumentException("The parsed IndexingSource
MUST NOT be NULL!");
+ }
+ if(from > to){
+ throw new IllegalArgumentException("The pared from revision
MUST NOT be bigger as the to revision!");
+ }
+ if(changed == null){
+ if(to != from){
+ throw new IllegalArgumentException("For empty
ChangeSets from and to revisions MUST BE the same!");
+ }
+ this.changedUris = Collections.emptyList();
+ } else {
+ this.changedUris = changed;
+ }
+ this.epoch = epoch;
+ this.from = from;
+ this.to = to;
+ this.source = source;
+ }
+
@Override
public long fromRevision() {
return from;
@@ -38,28 +61,17 @@ public class ChangeSetImpl<Item> impleme
}
@Override
- public Set<String> changed() {
- return changedUris;
- }
-
- @Override
- public Store<Item> getStore() {
- return store;
- }
-
- public void setFrom(long from) {
- this.from = from;
+ public IndexingSource<Item> getIndexingSource() {
+ return source;
}
- public void setTo(long to) {
- this.to = to;
- }
-
- public void setChangedUris(Set<String> changedUris) {
- this.changedUris = changedUris;
- }
-
- public void setStore(Store<Item> store) {
- this.store = store;
- }
+ @Override
+ public long getEpoch() {
+ return epoch;
+ }
+
+ @Override
+ public Iterator<String> iterator() {
+ return changedUris.iterator();
+ }
}
Modified:
incubator/stanbol/branches/contenthub-two-layered-structure/commons/semanticindex/servicesapi/src/main/java/org/apache/stanbol/commons/semanticindex/store/ChangeSet.java
URL:
http://svn.apache.org/viewvc/incubator/stanbol/branches/contenthub-two-layered-structure/commons/semanticindex/servicesapi/src/main/java/org/apache/stanbol/commons/semanticindex/store/ChangeSet.java?rev=1365952&r1=1365951&r2=1365952&view=diff
==============================================================================
---
incubator/stanbol/branches/contenthub-two-layered-structure/commons/semanticindex/servicesapi/src/main/java/org/apache/stanbol/commons/semanticindex/store/ChangeSet.java
(original)
+++
incubator/stanbol/branches/contenthub-two-layered-structure/commons/semanticindex/servicesapi/src/main/java/org/apache/stanbol/commons/semanticindex/store/ChangeSet.java
Thu Jul 26 11:35:05 2012
@@ -16,6 +16,7 @@
*/
package org.apache.stanbol.commons.semanticindex.store;
+import java.util.Iterator;
import java.util.Set;
/**
@@ -32,7 +33,7 @@ import java.util.Set;
* ChangeSet cs;
* do {
* cs = store.changes(revision, batchSize);
- * for(String changed : cs.changed()){
+ * for(String changed : cs){
* ContentItem ci = store.get(changed);
* if(ci == null){
* index.remove(changed);
@@ -44,7 +45,7 @@ import java.util.Set;
* index.persist(cs.fromRevision());
* </pre></code>
*/
-public interface ChangeSet<Item> {
+public interface ChangeSet<Item> extends Iterable<String>{
/**
* The lowest revision number included in this ChangeSet
*
@@ -58,19 +59,26 @@ public interface ChangeSet<Item> {
* @return the highest revision number of this set
*/
long toRevision();
-
+
+ /**
+ * The epoch of this ChangeSet. Revisions are only valid within a given
+ * Epoch. If the {@link IndexingSource} increases the epoch indexing needs
to start
+ * from scratch (see documentation of {@link IndexingSource} for details.
+ */
+ long getEpoch();
+
/**
- * The read only {@link Set} of changes ContentItems included in this
ChangeSet.
+ * The read only Iterator over the changed items of this ChangeSet
*
* @return the URIs of the changed contentItems included in this ChangeSet
*/
- Set<String> changed();
-
+ public Iterator<String> iterator();
+
/**
- * The reference to the {@link Store} of this {@link ChangeSet}. This
Store can be used to iterate on the
- * changes.
+ * The reference to the {@link Store} of this {@link ChangeSet}.
+ * This {@link IndexingSource} can be used to iterate on the changes.
*
- * @return
+ * @return the IndexingSource of this {@link ChangeSet}
*/
- Store<Item> getStore();
+ IndexingSource<Item> getIndexingSource();
}
Added:
incubator/stanbol/branches/contenthub-two-layered-structure/commons/semanticindex/servicesapi/src/main/java/org/apache/stanbol/commons/semanticindex/store/EpochException.java
URL:
http://svn.apache.org/viewvc/incubator/stanbol/branches/contenthub-two-layered-structure/commons/semanticindex/servicesapi/src/main/java/org/apache/stanbol/commons/semanticindex/store/EpochException.java?rev=1365952&view=auto
==============================================================================
---
incubator/stanbol/branches/contenthub-two-layered-structure/commons/semanticindex/servicesapi/src/main/java/org/apache/stanbol/commons/semanticindex/store/EpochException.java
(added)
+++
incubator/stanbol/branches/contenthub-two-layered-structure/commons/semanticindex/servicesapi/src/main/java/org/apache/stanbol/commons/semanticindex/store/EpochException.java
Thu Jul 26 11:35:05 2012
@@ -0,0 +1,22 @@
+package org.apache.stanbol.commons.semanticindex.store;
+
+
+/**
+ * Indicates that the active Epoch of an IndexingSource is different from the
+ * requested one.
+ *
+ */
+public class EpochException extends RuntimeException {
+
+ /**
+ * default serial version UID
+ */
+ private static final long serialVersionUID = 1L;
+
+ public EpochException(IndexingSource<?> source, long activeEpoch, long
requestedEpoch) {
+ super(String.format("The Epoch %s was requested but the %s '%s'
uses %s as active Epoch",
+
requestedEpoch,source.getClass().getSimpleName(),source.getName(),activeEpoch));
+ }
+
+
+}
Modified:
incubator/stanbol/branches/contenthub-two-layered-structure/commons/semanticindex/servicesapi/src/main/java/org/apache/stanbol/commons/semanticindex/store/IndexingSource.java
URL:
http://svn.apache.org/viewvc/incubator/stanbol/branches/contenthub-two-layered-structure/commons/semanticindex/servicesapi/src/main/java/org/apache/stanbol/commons/semanticindex/store/IndexingSource.java?rev=1365952&r1=1365951&r2=1365952&view=diff
==============================================================================
---
incubator/stanbol/branches/contenthub-two-layered-structure/commons/semanticindex/servicesapi/src/main/java/org/apache/stanbol/commons/semanticindex/store/IndexingSource.java
(original)
+++
incubator/stanbol/branches/contenthub-two-layered-structure/commons/semanticindex/servicesapi/src/main/java/org/apache/stanbol/commons/semanticindex/store/IndexingSource.java
Thu Jul 26 11:35:05 2012
@@ -2,9 +2,30 @@ package org.apache.stanbol.commons.seman
import java.util.Map;
+import org.apache.stanbol.commons.semanticindex.index.SemanticIndex;
+
/**
- * Minimal interface required by the Semantic Index as an indexing source.
+ * Minimal interface required by the Semantic Index as an indexing source.<p>
+ *
+ * This interface provides metadata, read-only access to Items as well as
+ * {@link ChangeSet changes} based on epoch and revision.
+ * <p>
+ * An <b>epoch</b> describes a span over time within that changes to Items will
+ * generate new revisions. New epoches are usually triggered if the whole
dataset
+ * of an indexing source is replace (e.g. if a new data-dump is applied).
Changes
+ * of Items (create, update, delete) will not trigger a new epoch but increase
+ * the revision.<p>
+ * This is intended to be used by {@link SemanticIndex}s as follows:<ul>
+ * <li> In case of a new epoch the semantic index needs to re-index all items
+ * of an indexing source. This is done by calling {@link #changes(long, long,
int)}
+ * with the new {@link #getEpoch() epoch} and {@link Long#MIN_VALUE} as
revision.
+ * <li> Revision changes are applied incrementally by the {@link SemanticIndex}
+ * calling {@link #changes(long, long, int)} with the last processed epoch and
+ * revision. If there are changes the IndexingSource will return a non-empty
+ * {@link ChangeSet}. In case of a new epoch an {@link EpochException} will
+ * be throw - indicating the need of a complete re-index to the SemanticIndex.
+ * </ul>
*
* @param <Item>
*/
@@ -50,20 +71,47 @@ public interface IndexingSource<Item> {
Item get(String uri) throws StoreException;
/**
- * Requests the next <code>batchSize</code> changes starting from
<code>revision</code>. If there are no
- * more revisions that a {@link ChangeSet} with an empty {@link
ChangeSet#changed()} set. There can be
- * more changes in the results than the given <code>batchSize</code> not
to return a subset of changes
- * regarding a specific revision. For instance, if the batch size is 5,
given revision is 9 and there 15
- * changes regarding revision 10. As a result, there will be 10 changed
items in the returned change set.
- *
+ * The current Epoch used by this indexing source. Newer epochs need to use
+ * hither values.<p>
+ * An change in the Epoch indicates that data provided by an indexing
source
+ * may have completely changed. All revisions of a previous epoch are
invalid.
+ * {@link SemanticIndex}s that uses an indexing source are required to
+ * start indexing from scratch.
+ * @return the active Epoch
+ */
+ long getEpoch();
+
+ /**
+ * Requests the next <code>batchSize</code> changes starting from
+ * <code>revision</code> in the context of an <code>epoch</code>. <p>
+ * The following actions are expected based on the result of this
Method:<ul>
+ * <li> An empty {@link ChangeSet} indicated that there are no changes.
+ * Typically the caller will use the same <code>epoch:revision</code> for
an
+ * other call at an later time
+ * <li> An non-empty {@link ChangeSet} indicates that there are changes.
+ * Callers are expected to process those {@link ChangeSet} and to
repeatedly
+ * call {@link #changes(long, long, int)} with {@link
ChangeSet#toRevision()}
+ * until an empty {@link ChangeSet} is returned
+ * <li> if the {@link #getEpoch() active Epoch} of the indexing source has
changed
+ * an {@link EpochException} is thrown. This indicating that a complete
re-indexing
+ * of all Entities is required.
+ * <li> {@link StoreException} should trigger an other attempt at a later
time
+ * </ul>
+ * @param epoch
+ * The epoch of the parsed revision
* @param revision
* Starting revision number for the returned {@link ChangeSet}
* @param batchSize
- * Maximum number of changes to be returned
+ * Maximum number of changes to be returned. The returned
{@link ChangeSet}
+ * will provides information about the revision range for
included
+ * changes.
* @return the {@link ChangeSet} with a maximum of <code>batchSize</code>
changes
* @throws StoreException
* On any error while accessing the store.
+ * @throws EpochException
+ * If the Epoch used by this IndexingSource is different of the
+ * epoch parsed in the request
* @see ChangeSet
*/
- ChangeSet<Item> changes(long revision, int batchSize) throws
StoreException;
+ ChangeSet<Item> changes(long epoch, long revision, int batchSize) throws
StoreException, EpochException;
}