Author: reto
Date: Sun May 15 00:56:01 2011
New Revision: 1103262
URL: http://svn.apache.org/viewvc?rev=1103262&view=rev
Log:
CLEREZZA-525: Created new Bundle providing WeightedTcProvider implementing
proxy making any graph on the web (or its cached copy) accessible via TcManager
WebProxy.scala:134: follows naming scheme being discussed, might be changed
Added:
incubator/clerezza/trunk/parent/rdf.storage.web/ (with props)
incubator/clerezza/trunk/parent/rdf.storage.web/pom.xml
incubator/clerezza/trunk/parent/rdf.storage.web/src/
incubator/clerezza/trunk/parent/rdf.storage.web/src/main/
incubator/clerezza/trunk/parent/rdf.storage.web/src/main/resources/
incubator/clerezza/trunk/parent/rdf.storage.web/src/main/resources/OSGI-INF/
incubator/clerezza/trunk/parent/rdf.storage.web/src/main/resources/OSGI-INF/serviceComponents.xml
incubator/clerezza/trunk/parent/rdf.storage.web/src/main/scala/
incubator/clerezza/trunk/parent/rdf.storage.web/src/main/scala/WebProxy.scala
Propchange: incubator/clerezza/trunk/parent/rdf.storage.web/
------------------------------------------------------------------------------
--- svn:ignore (added)
+++ svn:ignore Sun May 15 00:56:01 2011
@@ -0,0 +1 @@
+target
Added: incubator/clerezza/trunk/parent/rdf.storage.web/pom.xml
URL:
http://svn.apache.org/viewvc/incubator/clerezza/trunk/parent/rdf.storage.web/pom.xml?rev=1103262&view=auto
==============================================================================
--- incubator/clerezza/trunk/parent/rdf.storage.web/pom.xml (added)
+++ incubator/clerezza/trunk/parent/rdf.storage.web/pom.xml Sun May 15 00:56:01
2011
@@ -0,0 +1,99 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?><project
xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+ <modelVersion>4.0.0</modelVersion>
+ <parent>
+ <groupId>org.apache.clerezza</groupId>
+ <artifactId>parent</artifactId>
+ <version>0.2-incubating-SNAPSHOT</version>
+ </parent>
+ <artifactId>rdf.storage.web</artifactId>
+ <version>0.1-incubating-SNAPSHOT</version>
+ <packaging>bundle</packaging>
+ <name>Clerezza - Caching Web Storage Provider</name>
+ <description>A (currently read-only) storage provider retrieving graphs
from the web. It
+ implements a caching proxy, relying on another provider for storing
triples.</description>
+ <dependencies>
+ <dependency>
+ <groupId>org.osgi</groupId>
+ <artifactId>org.osgi.compendium</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.osgi</groupId>
+ <artifactId>org.osgi.core</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.clerezza</groupId>
+ <artifactId>platform.typerendering.core</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.clerezza</groupId>
+ <artifactId>platform.typerendering.scala</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.clerezza</groupId>
+ <artifactId>rdf.ontologies</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.clerezza</groupId>
+ <artifactId>rdf.utils</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.clerezza</groupId>
+ <artifactId>rdf.scala.utils</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.clerezza</groupId>
+ <artifactId>osgi.services</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.scala-lang</groupId>
+ <artifactId>scala-library</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>javax.ws.rs</groupId>
+ <artifactId>jsr311-api</artifactId>
+ </dependency>
+ </dependencies>
+ <build>
+ <sourceDirectory>src/main/scala</sourceDirectory>
+ <testSourceDirectory>src/test/scala</testSourceDirectory>
+ <plugins>
+ <plugin>
+ <groupId>org.apache.felix</groupId>
+ <artifactId>maven-bundle-plugin</artifactId>
+ <configuration>
+ <instructions>
+
<Service-Component>OSGI-INF/serviceComponents.xml</Service-Component>
+
<Export-Package>${project.groupId}.${project.artifactId}</Export-Package>
+
<Bundle-SymbolicName>${project.groupId}.${project.artifactId}</Bundle-SymbolicName>
+ </instructions>
+ </configuration>
+ </plugin>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-eclipse-plugin</artifactId>
+ <configuration>
+ <downloadSources>true</downloadSources>
+
<downloadJavadocs>true</downloadJavadocs>
+ <buildcommands>
+
<buildcommand>ch.epfl.lamp.sdt.core.scalabuilder</buildcommand>
+ </buildcommands>
+ <additionalProjectnatures>
+
<projectnature>ch.epfl.lamp.sdt.core.scalanature</projectnature>
+ </additionalProjectnatures>
+ <classpathContainers>
+
<classpathContainer>org.eclipse.jdt.launching.JRE_CONTAINER
+ </classpathContainer>
+
<classpathContainer>ch.epfl.lamp.sdt.launching.SCALA_CONTAINER
+ </classpathContainer>
+ </classpathContainers>
+ </configuration>
+ </plugin>
+ <plugin>
+ <groupId>org.scala-tools</groupId>
+ <artifactId>maven-scala-plugin</artifactId>
+ </plugin>
+ </plugins>
+ </build>
+</project>
+
+
Added:
incubator/clerezza/trunk/parent/rdf.storage.web/src/main/resources/OSGI-INF/serviceComponents.xml
URL:
http://svn.apache.org/viewvc/incubator/clerezza/trunk/parent/rdf.storage.web/src/main/resources/OSGI-INF/serviceComponents.xml?rev=1103262&view=auto
==============================================================================
---
incubator/clerezza/trunk/parent/rdf.storage.web/src/main/resources/OSGI-INF/serviceComponents.xml
(added)
+++
incubator/clerezza/trunk/parent/rdf.storage.web/src/main/resources/OSGI-INF/serviceComponents.xml
Sun May 15 00:56:01 2011
@@ -0,0 +1,17 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<components xmlns:scr="http://www.osgi.org/xmlns/scr/v1.0.0">
+
+ <scr:component enabled="true"
name="org.apache.clerezza.rdf.storage.web.WebProxy">
+ <implementation
class="org.apache.clerezza.rdf.storage.web.WebProxy"/>
+ <service servicefactory="false">
+ <provide
interface="org.apache.clerezza.rdf.storage.web.WebProxy"/>
+ <provide
interface="org.apache.clerezza.rdf.core.access.WeightedTcProvider"/>
+ </service>
+ <!-- hard coded <property name="weight" type="Integer" value="0"/> -->
+ <reference name="weightedTcProvider"
interface="org.apache.clerezza.rdf.core.access.WeightedTcProvider"
+ cardinality="1..n" policy="dynamic"
bind="bindWeightedTcProvider" unbind="unbindWeightedTcProvider"/>
+ <reference name="parser"
interface="org.apache.clerezza.rdf.core.serializedform.Parser"
+ cardinality="1..1" policy="static" bind="bindParser"
unbind="unbindParser"/>
+ </scr:component>
+
+</components>
Added:
incubator/clerezza/trunk/parent/rdf.storage.web/src/main/scala/WebProxy.scala
URL:
http://svn.apache.org/viewvc/incubator/clerezza/trunk/parent/rdf.storage.web/src/main/scala/WebProxy.scala?rev=1103262&view=auto
==============================================================================
---
incubator/clerezza/trunk/parent/rdf.storage.web/src/main/scala/WebProxy.scala
(added)
+++
incubator/clerezza/trunk/parent/rdf.storage.web/src/main/scala/WebProxy.scala
Sun May 15 00:56:01 2011
@@ -0,0 +1,255 @@
+package org.apache.clerezza.rdf.storage.web
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import org.apache.clerezza.platform.Constants
+import org.apache.clerezza.rdf.utils.GraphNode
+import org.osgi.service.component.ComponentContext
+import org.apache.clerezza.platform.config.PlatformConfig
+import java.net.{HttpURLConnection, URL}
+import org.apache.clerezza.rdf.core.serializedform.SupportedFormat
+import org.apache.clerezza.rdf.core.serializedform.Parser
+import org.apache.clerezza.platform.typerendering.WebRenderingService
+import org.apache.clerezza.rdf.core.access.security.TcPermission
+import java.io.File
+import java.security.{PrivilegedExceptionAction, PrivilegedActionException,
AccessController}
+
+import org.slf4j.scala._
+import org.apache.clerezza.rdf.core.access._
+import org.apache.clerezza.rdf.core.impl.AbstractMGraph
+import org.apache.clerezza.rdf.core._
+
+/**
+ * The Web Proxy Service enables applications to request remote (and local)
graphs.
+ * It keeps cached version of the remote graphs in store for faster delivery.
+ *
+ */
+class WebProxy extends WeightedTcProvider with Logging {
+
+ private var tcProvider: TcProviderMultiplexer = new
TcProviderMultiplexer
+
+ /**
+ * Register a provider
+ *
+ * @param provider
+ * the provider to be registered
+ */
+ protected def bindWeightedTcProvider(provider: WeightedTcProvider):
Unit = {
+ tcProvider.addWeightedTcProvider(provider)
+ }
+
+ /**
+ * Deregister a provider
+ *
+ * @param provider
+ * the provider to be deregistered
+ */
+ protected def unbindWeightedTcProvider(provider: WeightedTcProvider):
Unit = {
+ tcProvider.removeWeightedTcProvider(provider)
+ }
+
+ //todo: replace this with an appropriate graph
+ protected val authoritativeLocalGraphs = Constants.CONFIG_GRAPH_URI
+
+
+ /**OSGI method, called on activation */
+ protected def activate(context: ComponentContext) = {
+
+ }
+
+
+ private var parser: Parser = null
+
+ protected def bindParser(p: Parser) = {
+ parser = p
+ }
+
+ protected def unbindParser(p: Parser) = {
+ parser = null
+ }
+
+ def getWeight: Int = {
+ return 0
+ }
+
+ /**
+ * we don't do mgraphs
+ *
+ * @param name
+ * @return
+ * @throws NoSuchEntityException
+ */
+ def getMGraph(name: UriRef): MGraph = {
+ val graph = getGraph(name)
+ return new AbstractMGraph() {
+ protected def performFilter(subject: NonLiteral,
predicate: UriRef, `object` : Resource): java.util.Iterator[Triple] = {
+ graph.filter(subject, predicate, `object`)
+ }
+
+ def size = graph.size
+ }
+ }
+
+ def getGraph(name: UriRef): Graph = {
+ getGraph(name, Cache.Fetch)
+ }
+
+ def getTriples(name: UriRef): TripleCollection = {
+ return getMGraph(name)
+ }
+
+ def createMGraph(name: UriRef): MGraph = {
+ throw new UnsupportedOperationException
+ }
+
+ def createGraph(name: UriRef, triples: TripleCollection): Graph = {
+ throw new UnsupportedOperationException
+ }
+
+ def deleteTripleCollection(name: UriRef): Unit = {
+ throw new UnsupportedOperationException
+ }
+
+ def getNames(graph: Graph): java.util.Set[UriRef] = {
+ var result: java.util.Set[UriRef] = new
java.util.HashSet[UriRef]
+ import collection.JavaConversions._
+ for (name <- listGraphs) {
+ if (getGraph(name).equals(graph)) {
+ result.add(name)
+ }
+ }
+ return result
+ }
+
+ def listTripleCollections: java.util.Set[UriRef] = {
+ var result: java.util.Set[UriRef] = new
java.util.HashSet[UriRef]
+ result.addAll(listGraphs)
+ result.addAll(listMGraphs)
+ return result
+ }
+
+ def listGraphs: java.util.Set[UriRef] = {
+ //or should we list graphs for which we have a cached version?
+ return java.util.Collections.emptySet[UriRef]
+ }
+
+ def listMGraphs: java.util.Set[UriRef] = {
+ return java.util.Collections.emptySet[UriRef]
+ }
+
+ /**
+ * The semantics of this resource
+ * @param update if a remote URI, update information on the resource
first
+ */
+ def getGraph(name: UriRef, updatePolicy: Cache.Value): Graph = {
+ println("Getting ")
+ logger.warn("getting "+name)
+ val cacheGraphName = new UriRef("urn:x-localinstance:/cache/" +
name.getUnicodeString)
+ //todo: follow redirects and keep track of them
+ //todo: keep track of headers especially date and etag. test
for etag similarity
+ //todo: for https connection allow user to specify his webid
and send his key: ie allow web server to be an agent
+ //todo: add GRDDL functionality, so that other return types can
be processed too
+ //todo: enable ftp and other formats (though content
negotiation won't work there)
+ def updateGraph() {
+ val url = new URL(name.getUnicodeString)
+ val connection = url.openConnection()
+ connection match {
+ case hc: HttpURLConnection =>
hc.addRequestProperty("Accept", acceptHeader);
+ }
+ connection.connect()
+ val in = connection.getInputStream()
+ val mediaType = connection.getContentType()
+ val remoteTriples = parser.parse(in, mediaType, name)
+ tcProvider.synchronized {
+ try {
+
tcProvider.deleteTripleCollection(cacheGraphName)
+ } catch {
+ case e: NoSuchEntityException =>;
+ }
+ tcProvider.createGraph(cacheGraphName,
remoteTriples)
+ }
+ }
+ try {
+ AccessController.doPrivileged(new
PrivilegedExceptionAction[Graph] {
+ def run: Graph = {
+ //the logic here is not quite right, as
we don't look at time of previous fetch.
+ updatePolicy match {
+ case Cache.Fetch => try {
+
tcProvider.getGraph(cacheGraphName)
+ } catch {
+ case e:
NoSuchEntityException => updateGraph(); tcProvider.getGraph(cacheGraphName)
+ }
+ case Cache.ForceUpdate =>
updateGraph(); tcProvider.getGraph(cacheGraphName)
+ case Cache.CacheOnly =>
tcProvider.getGraph(cacheGraphName)
+ }
+
+ }
+ })
+ } catch {
+ case ex: PrivilegedActionException => {
+ var cause: Throwable = ex.getCause
+ if
(cause.isInstanceOf[UnsupportedOperationException]) {
+ throw
cause.asInstanceOf[UnsupportedOperationException]
+ }
+ if
(cause.isInstanceOf[EntityAlreadyExistsException]) {
+ throw
cause.asInstanceOf[EntityAlreadyExistsException]
+ }
+ if (cause.isInstanceOf[RuntimeException]) {
+ throw
cause.asInstanceOf[RuntimeException]
+ }
+ throw new RuntimeException(cause)
+ }
+ }
+ }
+
+
+ private lazy val acceptHeader = {
+
+ import scala.collection.JavaConversions._
+
+ (for (f <- parser.getSupportedFormats) yield {
+ val qualityOfFormat = {
+ f match {
+ //the default, well established format
+ case SupportedFormat.RDF_XML => "1.0";
+ //we prefer most dedicated formats to
(X)HTML, not because those are "better",
+ //but just because it is quite likely
that the pure RDF format will be
+ //lighter (contain less presentation
markup), and it is also possible that HTML does not
+ //contain any RDFa, but just points to
another format.
+ case SupportedFormat.XHTML => "0.5";
+ //we prefer XHTML over html, because
parsing (should) be easier
+ case SupportedFormat.HTML => "0.4";
+ //all other formats known currently are
structured formats
+ case _ => "0.8"
+ }
+ }
+ f + "; q=" + qualityOfFormat + ","
+ }).mkString + " *; q=.1" //is that for GRDDL?
+ }
+}
+
+object Cache extends Enumeration {
+ /**fetch if not in cache, if version in cache is out of date, or return
cache */
+ val Fetch = Value
+ /**fetch from source whatever is in cache */
+ val ForceUpdate = Value
+ /**only get cached version. If none exists return empty graph */
+ val CacheOnly = Value
+}