Author: reto
Date: Thu Mar  4 15:30:46 2010
New Revision: 919022

URL: http://svn.apache.org/viewvc?rev=919022&view=rev
Log:
CLEREZZA-145: added Smusher doing IFP smushing

Added:
    
incubator/clerezza/trunk/org.apache.clerezza.parent/org.apache.clerezza.rdf.utils/src/main/java/org/apache/clerezza/rdf/utils/Smusher.java
    
incubator/clerezza/trunk/org.apache.clerezza.parent/org.apache.clerezza.rdf.utils/src/test/java/org/apache/clerezza/rdf/utils/IfpSmushTest.java

Added: 
incubator/clerezza/trunk/org.apache.clerezza.parent/org.apache.clerezza.rdf.utils/src/main/java/org/apache/clerezza/rdf/utils/Smusher.java
URL: 
http://svn.apache.org/viewvc/incubator/clerezza/trunk/org.apache.clerezza.parent/org.apache.clerezza.rdf.utils/src/main/java/org/apache/clerezza/rdf/utils/Smusher.java?rev=919022&view=auto
==============================================================================
--- 
incubator/clerezza/trunk/org.apache.clerezza.parent/org.apache.clerezza.rdf.utils/src/main/java/org/apache/clerezza/rdf/utils/Smusher.java
 (added)
+++ 
incubator/clerezza/trunk/org.apache.clerezza.parent/org.apache.clerezza.rdf.utils/src/main/java/org/apache/clerezza/rdf/utils/Smusher.java
 Thu Mar  4 15:30:46 2010
@@ -0,0 +1,210 @@
+/*
+ *  Copyright 2010 reto.
+ * 
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ * 
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *  under the License.
+ */
+package org.apache.clerezza.rdf.utils;
+
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.clerezza.rdf.core.BNode;
+import org.apache.clerezza.rdf.core.MGraph;
+import org.apache.clerezza.rdf.core.NonLiteral;
+import org.apache.clerezza.rdf.core.Resource;
+import org.apache.clerezza.rdf.core.Triple;
+import org.apache.clerezza.rdf.core.TripleCollection;
+import org.apache.clerezza.rdf.core.UriRef;
+import org.apache.clerezza.rdf.core.impl.SimpleMGraph;
+import org.apache.clerezza.rdf.core.impl.TripleImpl;
+import org.apache.clerezza.rdf.ontologies.OWL;
+import org.apache.clerezza.rdf.ontologies.RDF;
+
+/**
+ * A utility to equate duplicate nodes in an Mgarph, currently only nodes with 
+ * a shared ifp are equated.
+ *
+ * @author reto
+ */
+public class Smusher {
+
+       /**
+        * smush mGaph given the ontological facts. Currently it does only
+        * one step ifp smushin, i.e. only ifps are taken in account and only
+        * nodes that have the same node as ifp object in the orignal graph are
+        * equates. (calling the method a second time might lead to additional
+        * smushings.)
+        *
+        * @param mGraph
+        * @param tBox
+        */
+       public static void smush(MGraph mGraph, TripleCollection tBox) {
+               final Set<UriRef> ifps = getIfps(tBox);
+               final Map<PredicateObject, Set<NonLiteral>> ifp2nodesMap = new 
HashMap<PredicateObject, Set<NonLiteral>>();
+               for (Iterator<Triple> it = mGraph.iterator(); it.hasNext();) {
+                       final Triple triple = it.next();
+                       final UriRef predicate = triple.getPredicate();
+                       if (!ifps.contains(predicate)) {
+                               continue;
+                       }
+                       final PredicateObject po = new 
PredicateObject(predicate, triple.getObject());
+                       Set<NonLiteral> equivalentNodes = ifp2nodesMap.get(po);
+                       if (equivalentNodes == null) {
+                               equivalentNodes = new HashSet<NonLiteral>();
+                               ifp2nodesMap.put(po, equivalentNodes);
+                       }
+                       equivalentNodes.add(triple.getSubject());
+               }
+               Set<Set<NonLiteral>> unitedEquivalenceSets = 
uniteEquivalenceSets(ifp2nodesMap.values());
+               Map<NonLiteral, NonLiteral> current2ReplacementMap = new 
HashMap<NonLiteral, NonLiteral>();
+               final MGraph owlSameAsGraph = new SimpleMGraph();
+               for (Set<NonLiteral> equivalenceSet : unitedEquivalenceSets) {
+                       final NonLiteral replacement = 
getReplacementFor(equivalenceSet, owlSameAsGraph);
+                       for (NonLiteral current : equivalenceSet) {
+                               if (!current.equals(replacement)) {
+                                       current2ReplacementMap.put(current, 
replacement);
+                               }
+                       }
+               }
+               final Set<Triple> newTriples = new HashSet<Triple>();
+               for (Iterator<Triple> it = mGraph.iterator(); it.hasNext();) {
+                       final Triple triple = it.next();
+                       Triple replacementTriple = null;
+                       final NonLiteral subject = triple.getSubject();
+                       NonLiteral subjectReplacement =
+                                       current2ReplacementMap.get(subject);
+                       final Resource object = triple.getObject();
+                       @SuppressWarnings("element-type-mismatch")
+                       Resource objectReplacement = 
current2ReplacementMap.get(object);
+                       if ((subjectReplacement != null) || (objectReplacement 
!= null)) {
+                               it.remove();
+                               if (subjectReplacement == null) {
+                                       subjectReplacement = subject;
+                               }
+                               if (objectReplacement == null) {
+                                       objectReplacement = object;
+                               }
+                               newTriples.add(new 
TripleImpl(subjectReplacement,
+                                               triple.getPredicate(), 
objectReplacement));
+                       }
+               }
+               for (Triple triple : newTriples) {
+                       mGraph.add(triple);
+               }
+               mGraph.addAll(owlSameAsGraph);
+       }
+
+       private static Set<UriRef> getIfps(TripleCollection tBox) {
+               final Iterator<Triple> ifpDefinitions = tBox.filter(null, 
RDF.type,
+                               OWL.InverseFunctionalProperty);
+               final Set<UriRef> ifps = new HashSet<UriRef>();
+               while (ifpDefinitions.hasNext()) {
+                       final Triple triple = ifpDefinitions.next();
+                       ifps.add((UriRef) triple.getSubject());
+               }
+               return ifps;
+       }
+
+       private static NonLiteral getReplacementFor(Set<NonLiteral> 
equivalenceSet, 
+                       MGraph owlSameAsGraph) {
+               final Set<UriRef> uriRefs = new HashSet<UriRef>();
+               for (NonLiteral nonLiteral : equivalenceSet) {
+                       if (nonLiteral instanceof UriRef) {
+                               uriRefs.add((UriRef) nonLiteral);
+                       }
+               }
+               switch (uriRefs.size()) {
+                       case 1:
+                               return uriRefs.iterator().next();
+                       case 0:
+                               return new BNode();
+               }
+               final Iterator<UriRef> uriRefIter = uriRefs.iterator();
+               //instead of an arbitrary one we might either decide 
lexicographically
+               //or look at their frequency in mGraph
+               final UriRef first = uriRefIter.next();
+               while (uriRefIter.hasNext()) {
+                       UriRef uriRef = uriRefIter.next();
+                       owlSameAsGraph.add(new TripleImpl(uriRef, OWL.sameAs, 
first));
+               }
+               return first;
+       }
+
+       private static Set<Set<NonLiteral>> uniteEquivalenceSets(
+                       Collection<Set<NonLiteral>> originalSets) {
+               final Map<NonLiteral, Set<Set<NonLiteral>>> node2OriginalSets =
+                               new HashMap<NonLiteral, Set<Set<NonLiteral>>>();
+               for (Set<NonLiteral> set : originalSets) {
+                       for (NonLiteral nonLiteral : set) {
+                               Set<Set<NonLiteral>> sets = 
node2OriginalSets.get(nonLiteral);
+                               if (sets == null) {
+                                       sets = new HashSet<Set<NonLiteral>>();
+                                       node2OriginalSets.put(nonLiteral, sets);
+                               }
+                               sets.add(set);
+                       }
+               }
+               Set<Set<NonLiteral>> result = new HashSet<Set<NonLiteral>>();
+               for (Set<Set<NonLiteral>> sets2Unite : 
node2OriginalSets.values()) {
+                       Set<NonLiteral> newSet = new HashSet<NonLiteral>();
+                       for (Set<NonLiteral> existingSet : sets2Unite) {
+                               newSet.addAll(existingSet);
+                       }
+                       result.add(newSet);
+               }
+               return result;
+
+       }
+
+       static class PredicateObject {
+
+               final UriRef predicate;
+               final Resource object;
+
+               public PredicateObject(UriRef predicate, Resource object) {
+                       this.predicate = predicate;
+                       this.object = object;
+               }
+
+               @Override
+               public boolean equals(Object obj) {
+                       if (obj == null) {
+                               return false;
+                       }
+                       if (getClass() != obj.getClass()) {
+                               return false;
+                       }
+                       final PredicateObject other = (PredicateObject) obj;
+                       if (this.predicate != other.predicate || 
!this.predicate.equals(other.predicate)) {
+                               return false;
+                       }
+                       if (this.object != other.object && 
!this.object.equals(other.object)) {
+                               return false;
+                       }
+                       return true;
+               }
+
+               @Override
+               public int hashCode() {
+                       int hash = 3;
+                       hash = 29 * hash + this.predicate.hashCode();
+                       hash = 13 * hash + this.object.hashCode();
+                       return hash;
+               }
+       };
+}

Added: 
incubator/clerezza/trunk/org.apache.clerezza.parent/org.apache.clerezza.rdf.utils/src/test/java/org/apache/clerezza/rdf/utils/IfpSmushTest.java
URL: 
http://svn.apache.org/viewvc/incubator/clerezza/trunk/org.apache.clerezza.parent/org.apache.clerezza.rdf.utils/src/test/java/org/apache/clerezza/rdf/utils/IfpSmushTest.java?rev=919022&view=auto
==============================================================================
--- 
incubator/clerezza/trunk/org.apache.clerezza.parent/org.apache.clerezza.rdf.utils/src/test/java/org/apache/clerezza/rdf/utils/IfpSmushTest.java
 (added)
+++ 
incubator/clerezza/trunk/org.apache.clerezza.parent/org.apache.clerezza.rdf.utils/src/test/java/org/apache/clerezza/rdf/utils/IfpSmushTest.java
 Thu Mar  4 15:30:46 2010
@@ -0,0 +1,118 @@
+/*
+ *  Copyright 2010 reto.
+ * 
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ * 
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *  under the License.
+ */
+
+package org.apache.clerezza.rdf.utils;
+
+import org.apache.clerezza.rdf.core.BNode;
+import org.apache.clerezza.rdf.core.MGraph;
+import org.apache.clerezza.rdf.core.UriRef;
+import org.apache.clerezza.rdf.core.impl.PlainLiteralImpl;
+import org.apache.clerezza.rdf.core.impl.SimpleMGraph;
+import org.apache.clerezza.rdf.core.impl.TripleImpl;
+import org.apache.clerezza.rdf.core.serializedform.Serializer;
+import org.apache.clerezza.rdf.ontologies.FOAF;
+import org.apache.clerezza.rdf.ontologies.OWL;
+import org.apache.clerezza.rdf.ontologies.RDF;
+import org.apache.clerezza.rdf.ontologies.RDFS;
+import org.junit.Assert;
+import org.junit.Test;
+
+/**
+ *
+ * @author reto
+ */
+public class IfpSmushTest {
+
+       private MGraph ontology = new SimpleMGraph();
+       {
+               ontology.add(new TripleImpl(FOAF.mbox, RDF.type, 
OWL.InverseFunctionalProperty));
+       }
+
+       @Test
+       public void simpleBNode()  {
+               MGraph mGraph = new SimpleMGraph();
+               UriRef mbox1 = new UriRef("mailto:[email protected]";);
+               final BNode bNode1 = new BNode();
+               mGraph.add(new TripleImpl(bNode1, FOAF.mbox, mbox1));
+               mGraph.add(new TripleImpl(bNode1, RDFS.comment, 
+                               new PlainLiteralImpl("a comment")));
+               final BNode bNode2 = new BNode();
+               mGraph.add(new TripleImpl(bNode2, FOAF.mbox, mbox1));
+               mGraph.add(new TripleImpl(bNode2, RDFS.comment, 
+                               new PlainLiteralImpl("another comment")));
+               Smusher.smush(mGraph, ontology);
+               Assert.assertEquals(3, mGraph.size());
+       }
+
+       @Test
+       public void overlappingEquivalenceClasses()  {
+               MGraph mGraph = new SimpleMGraph();
+               UriRef mbox1 = new UriRef("mailto:[email protected]";);
+               final BNode bNode1 = new BNode();
+               mGraph.add(new TripleImpl(bNode1, FOAF.mbox, mbox1));
+               mGraph.add(new TripleImpl(bNode1, RDFS.comment,
+                               new PlainLiteralImpl("a comment")));
+               final BNode bNode2 = new BNode();
+               UriRef mbox2 = new UriRef("mailto:[email protected]";);
+               mGraph.add(new TripleImpl(bNode2, FOAF.mbox, mbox1));
+               mGraph.add(new TripleImpl(bNode2, FOAF.mbox, mbox2));
+               mGraph.add(new TripleImpl(bNode2, RDFS.comment,
+                               new PlainLiteralImpl("another comment")));
+               final BNode bNode3 = new BNode();
+               mGraph.add(new TripleImpl(bNode3, FOAF.mbox, mbox2));
+               mGraph.add(new TripleImpl(bNode3, RDFS.comment,
+                               new PlainLiteralImpl("yet another comment")));
+               Smusher.smush(mGraph, ontology);
+               Assert.assertEquals(5, mGraph.size());
+       }
+
+       @Test
+       public void oneUriRef()  {
+               MGraph mGraph = new SimpleMGraph();
+               UriRef mbox1 = new UriRef("mailto:[email protected]";);
+               final UriRef resource = new UriRef("http://example.org/";);
+               mGraph.add(new TripleImpl(resource, FOAF.mbox, mbox1));
+               mGraph.add(new TripleImpl(resource, RDFS.comment,
+                               new PlainLiteralImpl("a comment")));
+               final BNode bNode2 = new BNode();
+               mGraph.add(new TripleImpl(bNode2, FOAF.mbox, mbox1));
+               mGraph.add(new TripleImpl(bNode2, RDFS.comment,
+                               new PlainLiteralImpl("another comment")));
+               Smusher.smush(mGraph, ontology);
+               Assert.assertEquals(3, mGraph.size());
+       }
+
+       @Test
+       public void twoUriRefs()  {
+               MGraph mGraph = new SimpleMGraph();
+               UriRef mbox1 = new UriRef("mailto:[email protected]";);
+               final UriRef resource1 = new UriRef("http://example.org/";);
+               mGraph.add(new TripleImpl(resource1, FOAF.mbox, mbox1));
+               mGraph.add(new TripleImpl(resource1, RDFS.comment,
+                               new PlainLiteralImpl("a comment")));
+               final UriRef resource2 = new UriRef("http://2.example.org/";);
+               mGraph.add(new TripleImpl(resource2, FOAF.mbox, mbox1));
+               mGraph.add(new TripleImpl(resource2, RDFS.comment,
+                               new PlainLiteralImpl("another comment")));
+               Smusher.smush(mGraph, ontology);
+               for (Object object : mGraph) {
+                       System.out.println(object);
+               }
+               Assert.assertEquals(4, mGraph.size());
+       }
+
+}


Reply via email to