Author: mir
Date: Tue Mar 2 15:46:53 2010
New Revision: 918082
URL: http://svn.apache.org/viewvc?rev=918082&view=rev
Log:
CLEREZZA-20: made stable serializer bundle.
Added:
incubator/clerezza/issues/CLEREZZA-20/org.apache.clerezza.rdf.stable.serializer/
(with props)
incubator/clerezza/issues/CLEREZZA-20/org.apache.clerezza.rdf.stable.serializer/pom.xml
incubator/clerezza/issues/CLEREZZA-20/org.apache.clerezza.rdf.stable.serializer/src/
incubator/clerezza/issues/CLEREZZA-20/org.apache.clerezza.rdf.stable.serializer/src/main/
incubator/clerezza/issues/CLEREZZA-20/org.apache.clerezza.rdf.stable.serializer/src/main/java/
incubator/clerezza/issues/CLEREZZA-20/org.apache.clerezza.rdf.stable.serializer/src/main/java/org/
incubator/clerezza/issues/CLEREZZA-20/org.apache.clerezza.rdf.stable.serializer/src/main/java/org/apache/
incubator/clerezza/issues/CLEREZZA-20/org.apache.clerezza.rdf.stable.serializer/src/main/java/org/apache/clerezza/
incubator/clerezza/issues/CLEREZZA-20/org.apache.clerezza.rdf.stable.serializer/src/main/java/org/apache/clerezza/rdf/
incubator/clerezza/issues/CLEREZZA-20/org.apache.clerezza.rdf.stable.serializer/src/main/java/org/apache/clerezza/rdf/jena/
incubator/clerezza/issues/CLEREZZA-20/org.apache.clerezza.rdf.stable.serializer/src/main/java/org/apache/clerezza/rdf/jena/serializer/
incubator/clerezza/issues/CLEREZZA-20/org.apache.clerezza.rdf.stable.serializer/src/main/java/org/apache/clerezza/rdf/stable/
incubator/clerezza/issues/CLEREZZA-20/org.apache.clerezza.rdf.stable.serializer/src/main/java/org/apache/clerezza/rdf/stable/serializer/
incubator/clerezza/issues/CLEREZZA-20/org.apache.clerezza.rdf.stable.serializer/src/main/java/org/apache/clerezza/rdf/stable/serializer/StableSerializerProvider.java
incubator/clerezza/issues/CLEREZZA-20/org.apache.clerezza.rdf.stable.serializer/src/main/resources/
incubator/clerezza/issues/CLEREZZA-20/org.apache.clerezza.rdf.stable.serializer/src/main/resources/META-INF/
incubator/clerezza/issues/CLEREZZA-20/org.apache.clerezza.rdf.stable.serializer/src/main/resources/META-INF/services/
incubator/clerezza/issues/CLEREZZA-20/org.apache.clerezza.rdf.stable.serializer/src/main/resources/META-INF/services/org.apache.clerezza.rdf.core.serializedform.SerializingProvider
incubator/clerezza/issues/CLEREZZA-20/org.apache.clerezza.rdf.stable.serializer/src/test/
incubator/clerezza/issues/CLEREZZA-20/org.apache.clerezza.rdf.stable.serializer/src/test/java/
incubator/clerezza/issues/CLEREZZA-20/org.apache.clerezza.rdf.stable.serializer/src/test/java/org/
incubator/clerezza/issues/CLEREZZA-20/org.apache.clerezza.rdf.stable.serializer/src/test/java/org/apache/
incubator/clerezza/issues/CLEREZZA-20/org.apache.clerezza.rdf.stable.serializer/src/test/java/org/apache/clerezza/
incubator/clerezza/issues/CLEREZZA-20/org.apache.clerezza.rdf.stable.serializer/src/test/java/org/apache/clerezza/rdf/
incubator/clerezza/issues/CLEREZZA-20/org.apache.clerezza.rdf.stable.serializer/src/test/java/org/apache/clerezza/rdf/jena/
incubator/clerezza/issues/CLEREZZA-20/org.apache.clerezza.rdf.stable.serializer/src/test/java/org/apache/clerezza/rdf/jena/serializer/
incubator/clerezza/issues/CLEREZZA-20/org.apache.clerezza.rdf.stable.serializer/src/test/java/org/apache/clerezza/rdf/stable/
incubator/clerezza/issues/CLEREZZA-20/org.apache.clerezza.rdf.stable.serializer/src/test/java/org/apache/clerezza/rdf/stable/serializer/
incubator/clerezza/issues/CLEREZZA-20/org.apache.clerezza.rdf.stable.serializer/src/test/java/org/apache/clerezza/rdf/stable/serializer/RandomGraph.java
incubator/clerezza/issues/CLEREZZA-20/org.apache.clerezza.rdf.stable.serializer/src/test/java/org/apache/clerezza/rdf/stable/serializer/StableSerializerProviderTest.java
incubator/clerezza/issues/CLEREZZA-20/org.apache.clerezza.rdf.stable.serializer/target/
Propchange:
incubator/clerezza/issues/CLEREZZA-20/org.apache.clerezza.rdf.stable.serializer/
------------------------------------------------------------------------------
--- svn:ignore (added)
+++ svn:ignore Tue Mar 2 15:46:53 2010
@@ -0,0 +1 @@
+target
Added:
incubator/clerezza/issues/CLEREZZA-20/org.apache.clerezza.rdf.stable.serializer/pom.xml
URL:
http://svn.apache.org/viewvc/incubator/clerezza/issues/CLEREZZA-20/org.apache.clerezza.rdf.stable.serializer/pom.xml?rev=918082&view=auto
==============================================================================
---
incubator/clerezza/issues/CLEREZZA-20/org.apache.clerezza.rdf.stable.serializer/pom.xml
(added)
+++
incubator/clerezza/issues/CLEREZZA-20/org.apache.clerezza.rdf.stable.serializer/pom.xml
Tue Mar 2 15:46:53 2010
@@ -0,0 +1,42 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?><project
xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+ <modelVersion>4.0.0</modelVersion>
+ <parent>
+ <artifactId>org.apache.clerezza.parent</artifactId>
+ <groupId>org.apache.clerezza</groupId>
+ <version>0.2-incubating-SNAPSHOT</version>
+ </parent>
+ <groupId>org.apache.clerezza</groupId>
+ <artifactId>org.apache.clerezza.rdf.stable.serializer</artifactId>
+ <packaging>bundle</packaging>
+ <version>0.1-incubating-SNAPSHOT</version>
+ <name>Clerezza - Stable Serializer</name>
+ <description>A SerializingProvider that prodocues a stable output. This
means
+ if the graph to be serialized changes a little, then also the
output
+ changes a little.</description>
+ <dependencies>
+ <dependency>
+ <groupId>org.apache.clerezza</groupId>
+ <artifactId>org.apache.clerezza.rdf.core</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.felix</groupId>
+
<artifactId>org.apache.felix.scr.annotations</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.clerezza</groupId>
+
<artifactId>org.apache.clerezza.rdf.jena.serializer</artifactId>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>junit</groupId>
+ <artifactId>junit</artifactId>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>commons-lang</groupId>
+ <artifactId>commons-lang</artifactId>
+ <version>2.4</version>
+ <scope>test</scope>
+ </dependency>
+ </dependencies>
+</project>
Added:
incubator/clerezza/issues/CLEREZZA-20/org.apache.clerezza.rdf.stable.serializer/src/main/java/org/apache/clerezza/rdf/stable/serializer/StableSerializerProvider.java
URL:
http://svn.apache.org/viewvc/incubator/clerezza/issues/CLEREZZA-20/org.apache.clerezza.rdf.stable.serializer/src/main/java/org/apache/clerezza/rdf/stable/serializer/StableSerializerProvider.java?rev=918082&view=auto
==============================================================================
---
incubator/clerezza/issues/CLEREZZA-20/org.apache.clerezza.rdf.stable.serializer/src/main/java/org/apache/clerezza/rdf/stable/serializer/StableSerializerProvider.java
(added)
+++
incubator/clerezza/issues/CLEREZZA-20/org.apache.clerezza.rdf.stable.serializer/src/main/java/org/apache/clerezza/rdf/stable/serializer/StableSerializerProvider.java
Tue Mar 2 15:46:53 2010
@@ -0,0 +1,421 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.clerezza.rdf.stable.serializer;
+
+import java.io.BufferedReader;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.OutputStream;
+import java.io.StringReader;
+import java.security.MessageDigest;
+import java.security.NoSuchAlgorithmException;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Vector;
+import org.apache.clerezza.rdf.core.BNode;
+import org.apache.clerezza.rdf.core.Literal;
+import org.apache.clerezza.rdf.core.Triple;
+import org.apache.clerezza.rdf.core.TripleCollection;
+import org.apache.clerezza.rdf.core.UriRef;
+import org.apache.clerezza.rdf.core.impl.SimpleMGraph;
+import org.apache.clerezza.rdf.core.serializedform.SerializingProvider;
+import org.apache.clerezza.rdf.core.serializedform.SupportedFormat;
+import org.apache.felix.scr.annotations.Component;
+import org.apache.felix.scr.annotations.Property;
+import org.apache.felix.scr.annotations.Reference;
+import org.apache.felix.scr.annotations.Service;
+import org.osgi.service.component.ComponentContext;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/*
+ * Credits:
+ *
+ * Blank node labeling algorithm by Jeremy J. Carroll (see "Signing RDF
Graphs",
+ * HP technical report 2003)
+ *
+ * Minimum Self-contained Graph (MSG) decomposition algorithm by
+ * Giovanni Tummarello, Christian Morbidoni, Paolo Puliti, Francesco Piazza,
+ * Università Politecnica delle Marche, Italy
+ * (see "Signing individual fragments of an RDF graph", 14th International
+ * World Wide Web Conference WWW2005, Poster track, May 2005, Chiba, Japan)
+ */
+
+/**
+ * A {...@link
org.apache.clerezza.rdf.core.serializedform.SerializingProvider} that tries
+ * to provide similar results when serializing graphs. Specifically it tries to
+ * label blank nodes deterministically with reasonable complexity.
+ *
+ * This serilaizer does not guarantee a deterministic result but it may minimze
+ * the ammount of modified lines in serilaized output.
+ *
+ * @author Daniel Spicar ([email protected])
+ */
+...@component
+...@service(SerializingProvider.class)
+...@supportedformat({SupportedFormat.N_TRIPLE})
+public class StableSerializerProvider implements SerializingProvider {
+
+ @Property(description="Specifies maximum ammount of blank node " +
+ "labeling recursions, may increase performance at the expense of
stability " +
+ "(0 = no limit).", intValue=0)
+ public static final String MAX_LABELING_ITERATIONS =
"max_labeling_iterations";
+
+ public static final String PARSER_FILTER =
+ "(supportedFormat=" + SupportedFormat.N3 +")";
+
+ @Reference(target=PARSER_FILTER)
+ SerializingProvider serializer;
+
+ private int maxLabelingIterations = -1;
+
+ private final Logger logger = LoggerFactory.getLogger(getClass());
+
+ protected void activate(ComponentContext cCtx) {
+ maxLabelingIterations = (Integer) cCtx.getProperties().
+ get(MAX_LABELING_ITERATIONS);
+ logger.info("StableSerializerProvider activated");
+ }
+
+ @Override
+ public void serialize(OutputStream os, TripleCollection tc,
+ String formatIdentifier) {
+
+ try {
+ List<String> lines = new Vector<String>();
+
+ for (TripleCollection msg : decomposeGraphToMSGs(tc)) {
+ ByteArrayOutputStream baos = new
ByteArrayOutputStream();
+
+ serializer.
+ serialize(baos, msg,
formatIdentifier);
+ BufferedReader serializedGraph =
+ new BufferedReader(new
StringReader(baos.toString()));
+ lines.addAll(labelBlankNodes(serializedGraph,
+ getMDHexString(msg, "MD5")));
+ }
+ Collections.sort(lines);
+ for (String l : lines) {
+ os.write((l + "\n").getBytes());
+ }
+ } catch (IOException ex) {
+ logger.error("Exception while parsing serilaized graph:
{}", ex);
+ } catch (NoSuchAlgorithmException ex) {
+ logger.error("Exception while trying to generate graph
hash: {}", ex);
+ }
+ }
+
+ private List<TripleCollection> decomposeGraphToMSGs(TripleCollection
tc) {
+
+ TripleCollection tmp = new SimpleMGraph();
+ tmp.addAll(tc);
+
+ List<TripleCollection> msgSet = new Vector<TripleCollection>();
+
+ while (tmp.size() > 0) {
+ Triple triple = tmp.iterator().next();
+ TripleCollection msg = new SimpleMGraph();
+
+ fillMSG(triple, tmp, msg);
+ msgSet.add(msg);
+ }
+
+ return msgSet;
+ }
+
+ private void fillMSG(Triple triple, TripleCollection tc,
+ TripleCollection msg) {
+
+ msg.add(triple);
+ tc.remove(triple);
+
+ if (triple.getSubject() instanceof BNode) {
+ Iterator<Triple> it = tc.filter(null, null,
triple.getSubject());
+ while (it.hasNext()) {
+ fillMSG(it.next(), tc, msg);
+ }
+ }
+ if (triple.getObject() instanceof BNode) {
+ Iterator<Triple> it =
+ tc.filter((BNode) triple.getObject(),
null, null);
+ while (it.hasNext()) {
+ fillMSG(it.next(), tc, msg);
+ }
+ }
+ }
+
+ private List<String> labelBlankNodes(BufferedReader serializedGraph,
+ String prefix) throws IOException {
+
+ String line = null;
+ List<String> lines = new Vector<String>();
+
+ long commentedIdentifiers = 0;
+ while ((line = serializedGraph.readLine()) != null) {
+ try {
+ commentedIdentifiers =
commentBlankNodeLabels(line,
+ commentedIdentifiers, lines);
+ } catch(IOException ex) {
+ logger.error("Exception while trying to parse
line: "
+ + line + "\n{}", ex);
+ }
+ }
+
+ Collections.sort(lines);
+
+ Map<String, Long> labels = new HashMap<String, Long>();
+ long[] counters = {1, commentedIdentifiers}; //counter[0] =
genSymCounter
+ long commentedIdentifierBefore;
+ int ctr = 0;
+ do {
+ commentedIdentifierBefore = counters[1];
+ counters = generateBlankNodeLabels(lines, labels,
counters[0],
+ counters[1], prefix);
+ applyLabels(lines, labels, prefix);
+ Collections.sort(lines);
+ if(++ctr == maxLabelingIterations) {
+ break;
+ }
+ } while (counters[1] > 0 && commentedIdentifierBefore !=
counters[1]);
+
+ if (counters[1] > 0) {
+ labelBlankNodesNonDeterministically(lines, counters,
labels, prefix);
+ }
+
+ return lines;
+ }
+
+
+ private long[] generateBlankNodeLabels(List<String> lines,
+ Map<String, Long> labels, long genSymCounter, long
comments,
+ String prefix) {
+
+ for (int i = 2; i <= lines.size(); ++i) {
+ StringBuilder previousLine = new
StringBuilder(lines.get(i - 2));
+ StringBuilder currentLine = new
StringBuilder(lines.get(i - 1));
+ StringBuilder nextLine = new StringBuilder();
+ if (i < lines.size()) {
+ nextLine.append(lines.get(i));
+ }
+
+ String currentLineWithoutComments =
stripComments(currentLine);
+ if
(stripComments(previousLine).equals(currentLineWithoutComments) ||
+
stripComments(nextLine).equals(currentLineWithoutComments)) {
+ continue;
+ }
+
+ int indexOfObject =
checkObject(currentLineWithoutComments);
+ if (indexOfObject != -1) {
+ genSymCounter = applyGenSymIdentifier(labels,
genSymCounter,
+ currentLine, indexOfObject,
prefix);
+ --comments;
+ }
+
+ int indexOfSubject =
checkSubject(currentLineWithoutComments);
+ if (indexOfSubject != -1) {
+ genSymCounter = applyGenSymIdentifier(labels,
genSymCounter,
+ currentLine, indexOfSubject,
prefix);
+ --comments;
+ }
+
+ lines.set(i - 1, currentLine.toString());
+ }
+
+ long[] result = {genSymCounter, comments};
+ return result;
+ }
+
+ private void applyLabels(List<String> lines, Map<String, Long> labels,
+ String prefix) {
+
+ for (int i = 0; i < lines.size(); ++i) {
+ StringBuilder line = new StringBuilder(lines.get(i));
+
+ int indexOfObject = checkObject(stripComments(line));
+ if (indexOfObject != -1) {
+ int indexOfComment = line.lastIndexOf("#_:");
+ String identifier =
+ line.substring(indexOfComment +
1, line.length());
+
+ if (labels.containsKey(identifier)) {
+ line.delete(indexOfComment,
line.length());
+ line.delete(indexOfObject,
indexOfObject + 1);
+ line.insert(indexOfObject, "_:" +
prefix +
+ labels.get(identifier));
+ }
+ }
+
+ int indexOfSubject = checkSubject(stripComments(line));
+ if (indexOfSubject != -1) {
+ int indexOfComment = line.lastIndexOf("#_:");
+ String identifier =
+ line.substring(indexOfComment +
1, line.length());
+
+ if (labels.containsKey(identifier)) {
+ line.delete(indexOfComment,
line.length());
+ line.delete(indexOfSubject,
indexOfSubject + 1);
+ line.insert(indexOfSubject, "_:" +
prefix +
+ labels.get(identifier));
+ }
+ }
+
+ lines.set(i, line.toString());
+ }
+ }
+
+ private long commentBlankNodeLabels(String line, long
commentedIdentifiers,
+ List<String> lines) throws IOException {
+
+ StringReader lineReader = new StringReader(line);
+ int data = lineReader.read();
+ while (data != -1) {
+ if (data == '<') {
+ //skip until end tag
+ while ((data = lineReader.read()) != '>') {
+ checkForEndOfStream(data);
+ }
+ } else if (data == '"') {
+ break;
+ } else if (data == '_') {
+ if ((data = lineReader.read()) == ':') {
+ String identifier = "_:";
+ while ((data = lineReader.read()) != '
') {
+ checkForEndOfStream(data);
+ identifier = identifier.concat(
+
Character.toString((char) data));
+ }
+ line = line.replaceFirst(identifier,
"~");
+ line = line.concat(" #" + identifier);
+ ++commentedIdentifiers;
+ checkForEndOfStream(data);
+ }
+ }
+ data = lineReader.read();
+ }
+ lines.add(line);
+ return commentedIdentifiers;
+ }
+
+ private long applyGenSymIdentifier(Map<String, Long> labels,
+ long genSymCounter, StringBuilder currentLine, int
where,
+ String prefix) {
+
+ int index = currentLine.lastIndexOf("#_:");
+ String identifier =
+ currentLine.substring(index + 1,
currentLine.length());
+ currentLine.delete(index, currentLine.length());
+ if (!labels.containsKey(identifier)) {
+ labels.put(identifier, genSymCounter++);
+ }
+ currentLine.delete(where, where + 1);
+ currentLine.insert(where, "_:" + prefix +
labels.get(identifier));
+
+ return genSymCounter;
+ }
+
+ private void labelBlankNodesNonDeterministically(List<String> lines,
+ long[] counters, Map<String, Long> labels, String
prefix) {
+
+ for (int i = 0; i < lines.size(); ++i) {
+ StringBuilder currentLine = new
StringBuilder(lines.get(i));
+ String currentLineWithoutComments =
stripComments(currentLine);
+ int indexOfObject =
checkObject(currentLineWithoutComments);
+ if (indexOfObject != -1) {
+ counters[0] = applyGenSymIdentifier(labels,
counters[0],
+ currentLine, indexOfObject,
prefix);
+ --(counters[1]);
+ }
+ int indexOfSubject =
checkSubject(currentLineWithoutComments);
+ if (indexOfSubject != -1) {
+ counters[0] = applyGenSymIdentifier(labels,
counters[0],
+ currentLine, indexOfSubject,
prefix);
+ --(counters[1]);
+ }
+ lines.set(i, currentLine.toString());
+ }
+ Collections.sort(lines);
+ }
+
+ private void checkForEndOfStream(int data) throws IOException {
+ if (data == -1) {
+ throw new IOException("Parsing Error!");
+ }
+ }
+
+ private int checkObject(String line) {
+ int index = -1;
+ if (line.charAt((index = line.length() - 3)) == '~') {
+ return index;
+ }
+ return -1;
+ }
+
+ private int checkSubject(String line) {
+ if (line.charAt(0) == '~') {
+ return 0;
+ }
+ return -1;
+ }
+
+
+ private String stripComments(StringBuilder line) {
+ if (line.length() < 3) {
+ return "";
+ }
+ return line.substring(0, line.lastIndexOf(" .") + 2);
+ }
+
+ private String getMDHexString(TripleCollection tc, String algorithm)
+ throws NoSuchAlgorithmException {
+
+ MessageDigest md = MessageDigest.getInstance(algorithm);
+ StringBuffer input = new StringBuffer();
+ for (Triple t : tc) {
+ if (!(t.getSubject() instanceof BNode)) {
+ input.append(((UriRef)
t.getSubject()).hashCode());
+ }
+ input.append(t.getPredicate().hashCode());
+ if (!(t.getObject() instanceof BNode)) {
+ if (t.getObject() instanceof Literal) {
+ input.append(((Literal)
t.getObject()).getLexicalForm());
+ } else {
+ input.append(((UriRef)
t.getObject()).hashCode());
+ }
+ }
+ }
+
+ md.update(input.toString().getBytes());
+ byte[] hash = md.digest();
+
+ StringBuffer hexString = new StringBuffer();
+ for (int i = 0; i < hash.length; i++) {
+ String hex = Integer.toHexString(0xFF & hash[i]);
+ if (hex.length() == 1) {
+ hexString.append('0');
+ }
+
+ hexString.append(hex);
+ }
+
+ return hexString.toString();
+ }
+}
\ No newline at end of file
Added:
incubator/clerezza/issues/CLEREZZA-20/org.apache.clerezza.rdf.stable.serializer/src/main/resources/META-INF/services/org.apache.clerezza.rdf.core.serializedform.SerializingProvider
URL:
http://svn.apache.org/viewvc/incubator/clerezza/issues/CLEREZZA-20/org.apache.clerezza.rdf.stable.serializer/src/main/resources/META-INF/services/org.apache.clerezza.rdf.core.serializedform.SerializingProvider?rev=918082&view=auto
==============================================================================
---
incubator/clerezza/issues/CLEREZZA-20/org.apache.clerezza.rdf.stable.serializer/src/main/resources/META-INF/services/org.apache.clerezza.rdf.core.serializedform.SerializingProvider
(added)
+++
incubator/clerezza/issues/CLEREZZA-20/org.apache.clerezza.rdf.stable.serializer/src/main/resources/META-INF/services/org.apache.clerezza.rdf.core.serializedform.SerializingProvider
Tue Mar 2 15:46:53 2010
@@ -0,0 +1 @@
+org.apache.clerezza.rdf.jena.serializer.JenaSerializerProvider
\ No newline at end of file
Added:
incubator/clerezza/issues/CLEREZZA-20/org.apache.clerezza.rdf.stable.serializer/src/test/java/org/apache/clerezza/rdf/stable/serializer/RandomGraph.java
URL:
http://svn.apache.org/viewvc/incubator/clerezza/issues/CLEREZZA-20/org.apache.clerezza.rdf.stable.serializer/src/test/java/org/apache/clerezza/rdf/stable/serializer/RandomGraph.java?rev=918082&view=auto
==============================================================================
---
incubator/clerezza/issues/CLEREZZA-20/org.apache.clerezza.rdf.stable.serializer/src/test/java/org/apache/clerezza/rdf/stable/serializer/RandomGraph.java
(added)
+++
incubator/clerezza/issues/CLEREZZA-20/org.apache.clerezza.rdf.stable.serializer/src/test/java/org/apache/clerezza/rdf/stable/serializer/RandomGraph.java
Tue Mar 2 15:46:53 2010
@@ -0,0 +1,178 @@
+/*
+ * Copyright 2010 mir.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * under the License.
+ */
+package org.apache.clerezza.rdf.stable.serializer;
+
+import java.util.Iterator;
+import java.util.UUID;
+import org.apache.clerezza.rdf.core.BNode;
+import org.apache.clerezza.rdf.core.NonLiteral;
+import org.apache.clerezza.rdf.core.Resource;
+import org.apache.clerezza.rdf.core.Triple;
+import org.apache.clerezza.rdf.core.UriRef;
+import org.apache.clerezza.rdf.core.impl.PlainLiteralImpl;
+import org.apache.clerezza.rdf.core.impl.SimpleMGraph;
+import org.apache.clerezza.rdf.core.impl.TripleImpl;
+import org.apache.commons.lang.RandomStringUtils;
+
+/**
+ *
+ * @author mir
+ */
+class RandomGraph extends SimpleMGraph {
+
+ public int growCount = 0;
+ public int removeCount =0;
+ public Triple evolve() {
+ Triple triple;
+ int random = rollDice(3);
+ if (random == 0 && size() != 0) {
+ triple = getRandomTriple();
+ remove(triple);
+ removeCount++;
+ } else {
+ triple = createRandomTriple();
+ add(triple);
+ growCount ++;
+ }
+ return triple;
+ }
+
+ private Triple createRandomTriple() {
+ return new TripleImpl(getSubject(), getPredicate(),
getObject());
+ }
+
+ private NonLiteral getSubject() {
+ int random = rollDice(2);
+ if (size() == 0) {
+ random = 0;
+ }
+ switch (random) {
+ case 0: // create new NonLiteral
+ Resource newResource;
+ do {
+ newResource = createRandomResource();
+ } while (!(newResource instanceof NonLiteral));
+ return (NonLiteral) newResource;
+ case 1: // get existing NonLiteral
+ Resource existingResource;
+ do {
+ existingResource =
getExistingResource();
+ if (existingResource == null) {
+ random = 0;
+ }
+ } while (!(existingResource instanceof
NonLiteral));
+
+ return (NonLiteral) existingResource;
+ }
+ throw new RuntimeException("in getSubject()");
+ }
+
+ private UriRef getPredicate() {
+ int random = rollDice(2);
+ if (size() == 0) {
+ random = 0;
+ }
+ switch (random) {
+ case 0: // create new UriRef
+ return createRandomUriRef();
+ case 1: // get existing UriRef
+ Resource existingResource;
+ do {
+ existingResource =
getExistingResource();
+ if (existingResource == null) {
+ random = 0;
+ }
+ } while (!(existingResource instanceof UriRef));
+ return (UriRef) existingResource;
+ }
+ throw new RuntimeException("in getPredicate()");
+ }
+
+ private Resource getObject() {
+ int random = rollDice(2);
+ if (size() == 0) {
+ random = 0;
+ }
+ switch (random) {
+ case 0: // create new resource
+ return createRandomResource();
+ case 1: // get existing resource
+ Resource existingResource =
getExistingResource();
+ if (existingResource == null) {
+ random = 0;
+ }
+ return existingResource;
+ }
+ throw new RuntimeException("in getObject()");
+ }
+
+ private static int rollDice(int faces) {
+ return Double.valueOf(Math.random() * faces).intValue();
+ }
+
+ private Resource createRandomResource() {
+ switch (rollDice(3)) {
+ case 0:
+ return new BNode();
+ case 1:
+ return createRandomUriRef();
+ case 2:
+ return new
PlainLiteralImpl(RandomStringUtils.random(rollDice(100) + 1));
+ }
+ throw new RuntimeException("in createRandomResource()");
+ }
+
+ private Resource getExistingResource() {
+ Triple triple = getRandomTriple();
+ if (triple == null) {
+ return null;
+ }
+ switch (rollDice(3)) {
+ case 0:
+ return triple.getSubject();
+ case 1:
+ return triple.getPredicate();
+ case 2:
+ return triple.getObject();
+ }
+ return null;
+ }
+
+ private UriRef createRandomUriRef() {
+ return new UriRef("http://" + UUID.randomUUID().toString());
+ }
+
+ private Triple getRandomTriple() {
+ int size = this.size();
+ if (size == 0) {
+ return null;
+ }
+ Iterator<Triple> iter = iterator();
+ while (rollDice(size * 2) != 0) {
+ if (!iter.hasNext()) {
+ iter = iterator();
+ }
+ }
+ Triple triple;
+ if (iter.hasNext()) {
+ triple = iter.next();
+ } else {
+ triple = iterator().next();
+ }
+ return triple;
+ }
+}
Added:
incubator/clerezza/issues/CLEREZZA-20/org.apache.clerezza.rdf.stable.serializer/src/test/java/org/apache/clerezza/rdf/stable/serializer/StableSerializerProviderTest.java
URL:
http://svn.apache.org/viewvc/incubator/clerezza/issues/CLEREZZA-20/org.apache.clerezza.rdf.stable.serializer/src/test/java/org/apache/clerezza/rdf/stable/serializer/StableSerializerProviderTest.java?rev=918082&view=auto
==============================================================================
---
incubator/clerezza/issues/CLEREZZA-20/org.apache.clerezza.rdf.stable.serializer/src/test/java/org/apache/clerezza/rdf/stable/serializer/StableSerializerProviderTest.java
(added)
+++
incubator/clerezza/issues/CLEREZZA-20/org.apache.clerezza.rdf.stable.serializer/src/test/java/org/apache/clerezza/rdf/stable/serializer/StableSerializerProviderTest.java
Tue Mar 2 15:46:53 2010
@@ -0,0 +1,88 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.clerezza.rdf.stable.serializer;
+
+import java.io.ByteArrayOutputStream;
+import junit.framework.Assert;
+import org.apache.clerezza.rdf.core.Triple;
+import org.apache.clerezza.rdf.core.serializedform.SupportedFormat;
+import org.apache.clerezza.rdf.jena.serializer.JenaSerializerProvider;
+import org.apache.commons.lang.StringUtils;
+import org.junit.Test;
+
+
+/**
+ * Serializes a Graph to different formats
+ *
+ * @author mir
+ */
+public class StableSerializerProviderTest {
+
+ /*
+ *
+ */
+ @Test
+ public void testTurtleSerializer() {
+ StableSerializerProvider ssp = new StableSerializerProvider();
+ JenaSerializerProvider jsp = new JenaSerializerProvider();
+ ssp.serializer = jsp;
+ int cycles = 50;
+ int totalJenaDistance = 0;
+ int totalStableDistance = 0;
+ RandomGraph rGraph = new RandomGraph();
+ Triple triple = null;
+ String newJenaResult = "", oldJenaResult = "",
+ newStableResult = "", oldStableResult = "";
+ for (int i = 0; i < cycles; i++) {
+ for (int j = 0; j < 3; j++) {
+ triple = rGraph.evolve();
+ }
+
+ int tripleSize = triple.toString().length();
+ oldJenaResult = newJenaResult;
+ newJenaResult = serializeWithJena(jsp, rGraph);
+
+ oldStableResult = newStableResult;
+ newStableResult = serializeWithStable(ssp, rGraph);
+
+ int jenaDistance =
StringUtils.getLevenshteinDistance(oldJenaResult, newJenaResult);
+ int stableDistance =
StringUtils.getLevenshteinDistance(oldStableResult, newStableResult);
+ totalJenaDistance += jenaDistance;
+ totalStableDistance += stableDistance;
+ }
+ Assert.assertTrue((totalStableDistance/cycles) <
(totalJenaDistance/cycles));
+ }
+
+ private String serializeWithJena(JenaSerializerProvider jsp,
RandomGraph rGraph) {
+ String jenaResult;
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ jsp.serialize(baos, rGraph, SupportedFormat.N_TRIPLE);
+ jenaResult = new String(baos.toByteArray());
+ return jenaResult;
+ }
+
+ private String serializeWithStable(StableSerializerProvider ssp,
RandomGraph rGraph) {
+ String stableResult;
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ ssp.serialize(baos, rGraph, SupportedFormat.N_TRIPLE);
+ stableResult = new String(baos.toByteArray());
+ return stableResult;
+ }
+
+}