Author: rwesten
Date: Tue Jan 22 18:19:35 2013
New Revision: 1437077
URL: http://svn.apache.org/viewvc?rev=1437077&view=rev
Log:
CLEREZZA-683: Implemented functionality that should correctly handle hash code
conflicts for BNodes. The assumption was that such conflicts are the reason why
sometimes iterators do return a different number of triples as for the
SimpleMGraph implementation. As this is not reproduceable with the Mac JavaVM
we will need to wait for some time to see if the code works as expected
Modified:
stanbol/trunk/commons/indexedgraph/src/main/java/org/apache/stanbol/commons/indexedgraph/IndexedTripleCollection.java
stanbol/trunk/commons/indexedgraph/src/test/java/org/apache/stanbol/commons/indexedgraph/IndexedGraphTest.java
Modified:
stanbol/trunk/commons/indexedgraph/src/main/java/org/apache/stanbol/commons/indexedgraph/IndexedTripleCollection.java
URL:
http://svn.apache.org/viewvc/stanbol/trunk/commons/indexedgraph/src/main/java/org/apache/stanbol/commons/indexedgraph/IndexedTripleCollection.java?rev=1437077&r1=1437076&r2=1437077&view=diff
==============================================================================
---
stanbol/trunk/commons/indexedgraph/src/main/java/org/apache/stanbol/commons/indexedgraph/IndexedTripleCollection.java
(original)
+++
stanbol/trunk/commons/indexedgraph/src/main/java/org/apache/stanbol/commons/indexedgraph/IndexedTripleCollection.java
Tue Jan 22 18:19:35 2013
@@ -16,13 +16,18 @@
*/
package org.apache.stanbol.commons.indexedgraph;
+import java.util.ArrayList;
import java.util.Collection;
import java.util.Comparator;
+import java.util.HashMap;
import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
import java.util.NavigableSet;
import java.util.SortedSet;
import java.util.TreeSet;
+import org.apache.clerezza.rdf.core.BNode;
import org.apache.clerezza.rdf.core.NonLiteral;
import org.apache.clerezza.rdf.core.Resource;
import org.apache.clerezza.rdf.core.Triple;
@@ -30,6 +35,8 @@ import org.apache.clerezza.rdf.core.Trip
import org.apache.clerezza.rdf.core.UriRef;
import org.apache.clerezza.rdf.core.impl.AbstractTripleCollection;
import org.apache.clerezza.rdf.core.impl.TripleImpl;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
/**
* {@link TripleCollection} implementation that uses indexes for <ul>
@@ -38,8 +45,8 @@ import org.apache.clerezza.rdf.core.impl
* <li> object, subject, predicate [OSP]
* </ul>
* Indexes are maintained in {@link TreeSet}s with according {@link Comparator}
- * instances ({@link #SPO_COMPARATOR}, {@link #POS_COMPARATOR} ,
- * {@link #OSP_COMPARATOR}). {@link Resource}s are compared first using the
+ * instances ({@link #spoComparator}, {@link #posComparator} ,
+ * {@link #ospComparator}). {@link Resource}s are compared first using the
* {@link Resource#hashCode()} and only if this matches by using
* {@link Resource}{@link #toString()}.<p>
* The {@link #filter(NonLiteral, UriRef, Resource)} implementation is based
@@ -53,9 +60,77 @@ import org.apache.clerezza.rdf.core.impl
*/
class IndexedTripleCollection extends AbstractTripleCollection implements
TripleCollection {
- private final NavigableSet<Triple> spo = new
TreeSet<Triple>(SPO_COMPARATOR);
- private final NavigableSet<Triple> pos = new
TreeSet<Triple>(POS_COMPARATOR);
- private final NavigableSet<Triple> osp = new
TreeSet<Triple>(OSP_COMPARATOR);
+ private static final Logger log =
LoggerFactory.getLogger(IndexedTripleCollection.class);
+
+ /**
+ * This map is used to ensure constant ordering for {@link BNode} that do
+ * have the same hashcode (and therefore result to have the same
+ * {@link BNode#toString()} value.
+ */
+ private final Map<Integer,List<Resource>> hashCodeConflictMap = new
HashMap<Integer,List<Resource>>();
+ /**
+ * Compares Triples based on Subject, Predicate, Object
+ */
+ private final Comparator<Triple> spoComparator = new Comparator<Triple>() {
+
+ @Override
+ public int compare(Triple a, Triple b) {
+ int c = IndexedTripleCollection.compare(a.getSubject(),
b.getSubject(), hashCodeConflictMap);
+ if(c == 0){
+ c = IndexedTripleCollection.compare(a.getPredicate(),
b.getPredicate(), hashCodeConflictMap);
+ if(c == 0){
+ c = IndexedTripleCollection.compare(a.getObject(),
b.getObject(), hashCodeConflictMap);
+ }
+ }
+ return c;
+ }
+ };
+ /**
+ * The SPO index
+ */
+ private final NavigableSet<Triple> spo = new
TreeSet<Triple>(spoComparator);
+ /**
+ * Compares Triples based on Predicate, Object, Subject
+ */
+ private final Comparator<Triple> posComparator = new Comparator<Triple>() {
+
+ @Override
+ public int compare(Triple a, Triple b) {
+ int c = IndexedTripleCollection.compare(a.getPredicate(),
b.getPredicate(), hashCodeConflictMap);
+ if(c == 0){
+ c = IndexedTripleCollection.compare(a.getObject(),
b.getObject(), hashCodeConflictMap);
+ if(c == 0){
+ c = IndexedTripleCollection.compare(a.getSubject(),
b.getSubject(), hashCodeConflictMap);
+ }
+ }
+ return c;
+ }
+ };
+ /**
+ * The POS index
+ */
+ private final NavigableSet<Triple> pos = new
TreeSet<Triple>(posComparator);
+ /**
+ * Compares Triples based on Object, Subject, Predicate
+ */
+ private final Comparator<Triple> ospComparator = new Comparator<Triple>() {
+
+ @Override
+ public int compare(Triple a, Triple b) {
+ int c = IndexedTripleCollection.compare(a.getObject(),
b.getObject(), hashCodeConflictMap);
+ if(c == 0){
+ c = IndexedTripleCollection.compare(a.getSubject(),
b.getSubject(), hashCodeConflictMap);
+ if(c == 0){
+ c = IndexedTripleCollection.compare(a.getPredicate(),
b.getPredicate(), hashCodeConflictMap);
+ }
+ }
+ return c;
+ }
+ };
+ /**
+ * The OSP index
+ */
+ private final NavigableSet<Triple> osp = new
TreeSet<Triple>(ospComparator);
/**
* Creates an empty {@link IndexedTripleCollection}
@@ -184,40 +259,6 @@ class IndexedTripleCollection extends Ab
}
- /**
- * Compares Triples based on Subject, Predicate, Object
- */
- public static final Comparator<Triple> SPO_COMPARATOR = new
Comparator<Triple>() {
-
- @Override
- public int compare(Triple a, Triple b) {
- int c = IndexedTripleCollection.compare(a.getSubject(),
b.getSubject());
- if(c == 0){
- c = IndexedTripleCollection.compare(a.getPredicate(),
b.getPredicate());
- if(c == 0){
- c = IndexedTripleCollection.compare(a.getObject(),
b.getObject());
- }
- }
- return c;
- }
- };
- /**
- * Compares Triples based on Predicate, Object, Subject
- */
- public static final Comparator<Triple> POS_COMPARATOR = new
Comparator<Triple>() {
-
- @Override
- public int compare(Triple a, Triple b) {
- int c = IndexedTripleCollection.compare(a.getPredicate(),
b.getPredicate());
- if(c == 0){
- c = IndexedTripleCollection.compare(a.getObject(),
b.getObject());
- if(c == 0){
- c = IndexedTripleCollection.compare(a.getSubject(),
b.getSubject());
- }
- }
- return c;
- }
- };
protected static UriRef MIN = new UriRef("") {
@Override
public int hashCode() {
@@ -233,23 +274,6 @@ class IndexedTripleCollection extends Ab
/**
- * Compares Triples based on Object, Subject, Predicate
- */
- public static final Comparator<Triple> OSP_COMPARATOR = new
Comparator<Triple>() {
-
- @Override
- public int compare(Triple a, Triple b) {
- int c = IndexedTripleCollection.compare(a.getObject(),
b.getObject());
- if(c == 0){
- c = IndexedTripleCollection.compare(a.getSubject(),
b.getSubject());
- if(c == 0){
- c = IndexedTripleCollection.compare(a.getPredicate(),
b.getPredicate());
- }
- }
- return c;
- }
- };
- /**
* Compares two resources with special support for {@link #MIN} and
* {@link #MAX} to allow building {@link SortedSet#subSet(Object, Object)}
* for <code>null</code> values parsed to
@@ -258,14 +282,53 @@ class IndexedTripleCollection extends Ab
* @param b
* @return
*/
- protected static int compare(Resource a, Resource b) {
+ protected static int compare(Resource a, Resource b,
Map<Integer,List<Resource>> confictsMap) {
int hashA = a.hashCode();
int hashB = b.hashCode();
if (hashA != hashB) {
return hashA > hashB ? 1 : -1;
}
- return a == MIN || b == MAX ? -1 :
+ int state = a == MIN || b == MAX ? -1 :
a == MAX || b == MIN ? 1 : a.toString().compareTo(b.toString());
+ if(state == 0 && //same string representation -> should be equals
+ a instanceof BNode && //but for BNodes it might be a hashcode
conflict
+ !a.equals(b)){ //so check if they are not equals
+ log.info("HashCode conflict for {} and {}",a,b); //we have a
conflict
+ //This is not a bad thing. We need just to ensure constant ordering
+ //and as there is nothing we can use to distinguish we need to keep
+ //this information in a list.
+ Integer hash = Integer.valueOf(a.hashCode());
+ List<Resource> resources = confictsMap.get(hash);
+ if(resources == null){ //new conflict ... just add and return
+ resources = new ArrayList<Resource>(2);
+ confictsMap.put(hash, resources);
+ resources.add(a);
+ resources.add(b);
+ return -1;
+ }
+ //already conflicting resource for this hash present
+ int aIndex=-1;
+ int bIndex=-1;
+ for(int i = 0; i<resources.size() && (aIndex < 0 || bIndex <
0);i++){
+ Resource r = resources.get(i);
+ if(aIndex < 0 && r.equals(a)){
+ aIndex = i;
+ }
+ if(bIndex < 0 && r.equals(b)){
+ bIndex = i;
+ }
+ }
+ if(aIndex < 0){ //a not found
+ aIndex = resources.size();
+ resources.add(a);
+ }
+ if(bIndex < 0){ //b not found
+ bIndex = resources.size();
+ resources.add(b);
+ }
+ return aIndex < bIndex ? -1 : 1;
+ } //no conflict or no conflict possible
+ return state;
}
Modified:
stanbol/trunk/commons/indexedgraph/src/test/java/org/apache/stanbol/commons/indexedgraph/IndexedGraphTest.java
URL:
http://svn.apache.org/viewvc/stanbol/trunk/commons/indexedgraph/src/test/java/org/apache/stanbol/commons/indexedgraph/IndexedGraphTest.java?rev=1437077&r1=1437076&r2=1437077&view=diff
==============================================================================
---
stanbol/trunk/commons/indexedgraph/src/test/java/org/apache/stanbol/commons/indexedgraph/IndexedGraphTest.java
(original)
+++
stanbol/trunk/commons/indexedgraph/src/test/java/org/apache/stanbol/commons/indexedgraph/IndexedGraphTest.java
Tue Jan 22 18:19:35 2013
@@ -176,7 +176,7 @@ public class IndexedGraphTest extends M
MGraph sg = new SimpleMGraph();
int iterations = 100; //reduced from 1000
int graphsize = 100000;
- Long seed = new Long("1332943407752");//System.currentTimeMillis();
+ Long seed = System.currentTimeMillis();
log.info("Test Seed: {}",seed);
createGraph(sg, graphsize, seed);
MGraph ig = new IndexedMGraph(sg);