Author: rwesten
Date: Tue Mar 20 13:54:23 2012
New Revision: 1302897

URL: http://svn.apache.org/viewvc?rev=1302897&view=rev
Log:
initial commit of an eHealth domain demo. See README.md for details

Added:
    incubator/stanbol/trunk/demos/ehealth/   (with props)
    incubator/stanbol/trunk/demos/ehealth/README.md
    incubator/stanbol/trunk/demos/ehealth/index.sh   (with props)
    incubator/stanbol/trunk/demos/ehealth/pom.xml   (with props)
    incubator/stanbol/trunk/demos/ehealth/src/
    incubator/stanbol/trunk/demos/ehealth/src/main/
    incubator/stanbol/trunk/demos/ehealth/src/main/indexing/
    incubator/stanbol/trunk/demos/ehealth/src/main/indexing/config/
    incubator/stanbol/trunk/demos/ehealth/src/main/indexing/config/ehealth/
    incubator/stanbol/trunk/demos/ehealth/src/main/indexing/config/ehealth/conf/
    
incubator/stanbol/trunk/demos/ehealth/src/main/indexing/config/ehealth/conf/admin-extra.html
   (with props)
    
incubator/stanbol/trunk/demos/ehealth/src/main/indexing/config/ehealth/conf/elevate.xml
   (with props)
    
incubator/stanbol/trunk/demos/ehealth/src/main/indexing/config/ehealth/conf/mapping-FoldToASCII.txt
   (with props)
    
incubator/stanbol/trunk/demos/ehealth/src/main/indexing/config/ehealth/conf/mapping-ISOLatin1Accent.txt
   (with props)
    
incubator/stanbol/trunk/demos/ehealth/src/main/indexing/config/ehealth/conf/protwords.txt
   (with props)
    
incubator/stanbol/trunk/demos/ehealth/src/main/indexing/config/ehealth/conf/schema.xml
   (with props)
    
incubator/stanbol/trunk/demos/ehealth/src/main/indexing/config/ehealth/conf/scripts.conf
    
incubator/stanbol/trunk/demos/ehealth/src/main/indexing/config/ehealth/conf/solrconfig.xml
   (with props)
    
incubator/stanbol/trunk/demos/ehealth/src/main/indexing/config/ehealth/conf/spellings.txt
   (with props)
    
incubator/stanbol/trunk/demos/ehealth/src/main/indexing/config/ehealth/conf/stopwords.txt
   (with props)
    
incubator/stanbol/trunk/demos/ehealth/src/main/indexing/config/ehealth/conf/stopwords_de.txt
   (with props)
    
incubator/stanbol/trunk/demos/ehealth/src/main/indexing/config/ehealth/conf/synonyms.txt
   (with props)
    
incubator/stanbol/trunk/demos/ehealth/src/main/indexing/config/ehealth/conf/synonyms_de.txt
   (with props)
    
incubator/stanbol/trunk/demos/ehealth/src/main/indexing/config/fieldboosts.properties
    
incubator/stanbol/trunk/demos/ehealth/src/main/indexing/config/indexing.properties
    incubator/stanbol/trunk/demos/ehealth/src/main/indexing/config/mappings.txt
    incubator/stanbol/trunk/demos/ehealth/src/main/resources/
    incubator/stanbol/trunk/demos/ehealth/src/main/resources/config/
    
incubator/stanbol/trunk/demos/ehealth/src/main/resources/config/org.apache.stanbol.enhancer.chain.weighted.impl.WeightedChain-ehealth.config
    
incubator/stanbol/trunk/demos/ehealth/src/main/resources/config/org.apache.stanbol.enhancer.engines.keywordextraction.engine.KeywordLinkingEngine-drugid.config
    
incubator/stanbol/trunk/demos/ehealth/src/main/resources/config/org.apache.stanbol.enhancer.engines.keywordextraction.engine.KeywordLinkingEngine-ehealth.config

Propchange: incubator/stanbol/trunk/demos/ehealth/
------------------------------------------------------------------------------
--- svn:ignore (added)
+++ svn:ignore Tue Mar 20 13:54:23 2012
@@ -0,0 +1 @@
+target

Added: incubator/stanbol/trunk/demos/ehealth/README.md
URL: 
http://svn.apache.org/viewvc/incubator/stanbol/trunk/demos/ehealth/README.md?rev=1302897&view=auto
==============================================================================
--- incubator/stanbol/trunk/demos/ehealth/README.md (added)
+++ incubator/stanbol/trunk/demos/ehealth/README.md Tue Mar 20 13:54:23 2012
@@ -0,0 +1,71 @@
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+
+
+Apache Stanbol Demo: eHealth
+============================
+
+This module provides a demo on how to customize Apache Stanbol to the ehealth 
domain.
+
+This demo uses the following datasets:
+
+* __[Dailymed](http://dailymed.nlm.nih.gov/dailymed/)__([RDF 
version](http://www4.wiwiss.fu-berlin.de/dailymed/))): Published by the 
National Library of Medicine, this dataset provides high quality information 
about marketed drugs.
+* __[SIDER](http://sideeffects.embl.de/)__([RDF 
version](http://www4.wiwiss.fu-berlin.de/sider)): SIDER contains information on 
marketed drugs and their adverse effects. The information is extracted from 
public documents and package inserts.
+* 
__[Diseasome](http://www.nd.edu/~networks/Publication%20Categories/03%20Journal%20Articles/Biology/HumanDisease_PNAS-V104-p8685(14My07).pdf)__([RDF
 version](http://www4.wiwiss.fu-berlin.de/diseasome)): The human disease 
network publishes a network of 4,300 disorders and disease genes linked by 
known disorder-gene associations for exploring all known phenotype and disease 
gene associations, indicating the common genetic origin of many diseases.
+* __[DrugBank](http://www.drugbank.ca/)__([RDF 
version](http://www4.wiwiss.fu-berlin.de/drugbank)): A repository of almost 
5000 FDA-approved small molecule and biotech drugs. It contains detailed 
information about drugs including chemical, pharmacological and pharmaceutical 
data; along with comprehensive drug target data such as sequence, structure, 
and pathway information.
+
+Note that the RDF versions of this dataset used by this dataset is hosted [by 
the [Freie Universität 
Berlin](http://www.wiwiss.fu-berlin.de/en/institute/pwo/bizer/)
+
+This demo shows how to
+
+1. import the listed datasets to the Apache Stanbol Entityhub by using the 
indexing utilities provided by the Stanbol Entityhub. Including the usage 
examples for 
+    * schema mappings during indexing (see 
"src/main/indexing/config/mappings.txt")
+    * customized Apache Solr schemas to control how fields are indexed (see 
solr core configuration at "src/main/indexing/config/ehealth")
+2. "install" the indexed data set - make them available via the Apache Stanbol 
Entityhub
+3. configure the Stanbol Enhancer to extract ehealth related entities
+    * based on there labels (fields mapped to rdfs:label)
+    * Durgs based on their various IDs (fields mapped to skos:notation)
+
+## Usage
+
+To install the demo you will need to do the following steps
+
+1. execute "__mvn install__": This will create the bundle 
"org.apache.stanbol.demo.ehealth-*.jar" in the "./target" folder. This bundle 
includes the configuration for Apache Stanbol Components:
+    * Two configurations for the 
[KeywordLinkingEngine](http://incubator.apache.org/stanbol/docs/trunk/enhancer/engines/keywordlinkingengine.html).
 One that is configured to extract Entities of the above datasets based on 
their labels and an other one that is configured to extract Drugs based on 
their IDs.
+    * A special 
[EnhancementChain](http://incubator.apache.org/stanbol/docs/trunk/enhancer/chains/)
 for processing ehealt data.
+2. execute "__./index.sh__": This shell script automates the steps described 
in detail by the [Working with local Entities 
Guide](http://incubator.apache.org/stanbol/docs/trunk/customvocabulary.html). 
This includes the following steps. 
+    * assembly the generic RDF indexing tool 
({stanbol}/entityhub/indexing/genericrdf)
+    * copy the configuration from "./src/main/indexing/config" to the target 
directory used for indexing
+    * initialize missing configs by calling "java -jar 
org.apache.stanbol.entityhub.indexing.genericrdf-*-jar-with-dependencies.jar 
init"
+    * download the datasets listed above to 
"./target/indexing/indexing/resources/rdfdata/"
+    * index the datasets
+    * copy the results "org.apache.stanbol.data.site.ehealth-1.0.0.jar" and 
"ehealth.solrindex.zip" to the "./target" folder
+3. __Install__ the data to a running Stanbol instance: Both the Stable and the 
Full launcher can be used as base for this demo. The following steps are 
required to install the demo
+    1. copy "./target/ehealth.solrindex.zip" to 
"{stanbol-workingdir}/sling/datafils"
+    2. install both bundles 
"./target/org.apache.stanbol.data.site.ehealth-1.0.0.jar" and 
"./target/org.apache.stanbol.demo.ehealth-*.jar" to your Stanbol instance. 
Users can use the [Apache Felix Web 
Console](http://localhost:8080/system/console/bundles)(url: 
http:{host}:{port}/system/console/bundles) for this task.
+    3. wait a minute until Stanbol has installed the data from the 
"ehealth.solrindex.zip" file
+
+
+After that the you will be able to 
+
+* use the datasets with the [Stanbol 
Entityub](http://localhost:8080/entityhub/site/ehealth/)(url: 
http:{host}:{port}/{alias}/entityhub/site/ehealth)
+* extract ehealth related terms by using the [Stanbol 
Enhancer](http://localhost:8080/enhancer/chain/ehealth) (url: 
http:{host}:{port}/{alias}/enhancer/chain/ehealth)
+
+
+## Backround information about this demo
+
+TODO!!
\ No newline at end of file

Added: incubator/stanbol/trunk/demos/ehealth/index.sh
URL: 
http://svn.apache.org/viewvc/incubator/stanbol/trunk/demos/ehealth/index.sh?rev=1302897&view=auto
==============================================================================
--- incubator/stanbol/trunk/demos/ehealth/index.sh (added)
+++ incubator/stanbol/trunk/demos/ehealth/index.sh Tue Mar 20 13:54:23 2012
@@ -0,0 +1,90 @@
+#!/usr/bin/env bash
+
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# 1. build the indexing tool and copy it to the /data directory
+
+if [ ! -f target/indexing ]
+then
+    mkdir -p target/indexing
+fi
+
+if [ ! -f 
target/indexing/org.apache.stanbol.entityhub.indexing.genericrdf-*-jar-with-dependencies.jar
 ]
+then
+    echo "Prepairing Indexing Tool"
+    cd ../../entityhub/indexing/genericrdf/
+    if [ ! -f 
target/org.apache.stanbol.entityhub.indexing.genericrdf-*-jar-with-dependencies.jar
 ]
+    then
+        mvn assembly:single
+    fi
+    cd ../../../demos/ehealth/
+    cp 
../../entityhub/indexing/genericrdf/target/org.apache.stanbol.entityhub.indexing.genericrdf-*-jar-with-dependencies.jar
 target/indexing/
+else
+    echo "Indexing Tool present ... skip assembling a new version"
+fi
+
+# 2. init the configuration
+cd target/indexing/
+if [ ! -f indexing ]
+then
+    echo "Copying Indexing Configuration"
+    mkdir -p indexing/config
+    cp -R ../../src/main/indexing/config/ indexing/config
+    # init missing directories and config files
+    java -jar 
org.apache.stanbol.entityhub.indexing.genericrdf-*-jar-with-dependencies.jar 
init 
+fi
+
+# 3. download the files
+cd indexing/resources/
+if [ ! -f imported ]
+then
+    cd rdfdata/
+    echo "Downloading RDF dumps"
+    if [ ! -f sider_dump.nt.bz2 ]
+    then
+        echo "Downloading SIDER"
+        wget -c http://www4.wiwiss.fu-berlin.de/sider/sider_dump.nt.bz2
+    fi
+
+    if [ ! -f drugbank_dump.nt.bz2 ]
+    then
+        echo "Downloading DrugBank"
+        wget -c http://www4.wiwiss.fu-berlin.de/drugbank/drugbank_dump.nt.bz2
+    fi
+
+    if [ ! -f dailymed_dump.nt.bz2 ]
+    then
+        echo "Downloading Dailymed"
+        wget -c http://www4.wiwiss.fu-berlin.de/dailymed/dailymed_dump.nt.bz2
+    fi
+
+    if [ ! -f diseasome_dump.nt.bz2 ]
+    then
+        echo "Downloading Diseasome"
+        wget -c http://www4.wiwiss.fu-berlin.de/diseasome/diseasome_dump.nt.bz2
+    fi
+    cd ..
+else
+    echo "RDF data already imported"
+fi
+cd ../..
+
+# 3 Now we can start the indexing
+
+java -jar -Xmx1024m -server 
org.apache.stanbol.entityhub.indexing.genericrdf-*-jar-with-dependencies.jar 
index
+
+# finally copy the dist to the /target directory
+cp -R indexing/dist/ ./..

Propchange: incubator/stanbol/trunk/demos/ehealth/index.sh
------------------------------------------------------------------------------
    svn:executable = *

Added: incubator/stanbol/trunk/demos/ehealth/pom.xml
URL: 
http://svn.apache.org/viewvc/incubator/stanbol/trunk/demos/ehealth/pom.xml?rev=1302897&view=auto
==============================================================================
--- incubator/stanbol/trunk/demos/ehealth/pom.xml (added)
+++ incubator/stanbol/trunk/demos/ehealth/pom.xml Tue Mar 20 13:54:23 2012
@@ -0,0 +1,73 @@
+<?xml version="1.0"?>
+  <!--
+    Licensed to the Apache Software Foundation (ASF) under one or more 
contributor license agreements. See the
+    NOTICE file distributed with this work for additional information 
regarding copyright ownership. The ASF
+    licenses this file to You under the Apache License, Version 2.0 (the 
"License"); you may not use this file
+    except in compliance with the License. You may obtain a copy of the 
License at
+
+    http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable 
law or agreed to in writing,
+    software distributed under the License is distributed on an "AS IS" BASIS, 
WITHOUT WARRANTIES OR
+    CONDITIONS OF ANY KIND, either express or implied. See the License for the 
specific language governing
+    permissions and limitations under the License.
+  -->
+<project>
+  <modelVersion>4.0.0</modelVersion>
+
+  <parent>
+    <groupId>org.apache.stanbol</groupId>
+    <artifactId>stanbol-parent</artifactId>
+    <version>0.9.0-incubating-SNAPSHOT</version>
+    <relativePath>../../parent</relativePath>
+  </parent>
+
+  <groupId>org.apache.stanbol</groupId>
+  <artifactId>org.apache.stanbol.demo.ehealth</artifactId>
+  <packaging>bundle</packaging>
+
+  <name>Apache Stanbol Demo: ehealth</name>
+  <description>A demo on how to customise Apache Stanbol for the ehealth 
domain</description>
+  <scm>
+    <connection>
+      
scm:svn:http://svn.apache.org/repos/asf/incubator/stanbol/trunk/demo/ehealth
+    </connection>
+    <developerConnection>
+      
scm:svn:https://svn.apache.org/repos/asf/incubator/stanbol/trunk/demo/ehealth
+    </developerConnection>
+    <url>http://incubator.apache.org/stanbol</url>
+  </scm>
+
+  <build>
+    <!-- make it an OSGi bundle -->
+    <plugins>
+      <plugin>
+        <groupId>org.apache.felix</groupId>
+        <artifactId>maven-bundle-plugin</artifactId>
+        <extensions>true</extensions>
+        <configuration>
+          <instructions>
+            <Install-Path>config</Install-Path>
+          </instructions>
+        </configuration>
+      </plugin>
+      <plugin>
+        <groupId>org.apache.felix</groupId>
+        <artifactId>maven-scr-plugin</artifactId>
+      </plugin>
+      <plugin>
+        <groupId>org.apache.rat</groupId>
+        <artifactId>apache-rat-plugin</artifactId>
+        <configuration>
+          <excludes>
+            <!-- AL20 licensed files. See src/main/resources/README -->
+            <exclude>src/main/resources/config/*.config</exclude>
+          </excludes>
+        </configuration>
+      </plugin>
+    </plugins>
+  </build>
+
+  <dependencies>
+    <!-- none - config only -->
+  </dependencies>
+
+</project>

Propchange: incubator/stanbol/trunk/demos/ehealth/pom.xml
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: 
incubator/stanbol/trunk/demos/ehealth/src/main/indexing/config/ehealth/conf/admin-extra.html
URL: 
http://svn.apache.org/viewvc/incubator/stanbol/trunk/demos/ehealth/src/main/indexing/config/ehealth/conf/admin-extra.html?rev=1302897&view=auto
==============================================================================
--- 
incubator/stanbol/trunk/demos/ehealth/src/main/indexing/config/ehealth/conf/admin-extra.html
 (added)
+++ 
incubator/stanbol/trunk/demos/ehealth/src/main/indexing/config/ehealth/conf/admin-extra.html
 Tue Mar 20 13:54:23 2012
@@ -0,0 +1,31 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<!-- The content of this page will be statically included into the top
+of the admin page.  Uncomment this as an example to see there the content
+will show up.
+
+<hr>
+<i>This line will appear before the first table</i>
+<tr>
+<td colspan="2">
+This row will be appended to the end of the first table
+</td>
+</tr>
+<hr>
+
+-->

Propchange: 
incubator/stanbol/trunk/demos/ehealth/src/main/indexing/config/ehealth/conf/admin-extra.html
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: 
incubator/stanbol/trunk/demos/ehealth/src/main/indexing/config/ehealth/conf/elevate.xml
URL: 
http://svn.apache.org/viewvc/incubator/stanbol/trunk/demos/ehealth/src/main/indexing/config/ehealth/conf/elevate.xml?rev=1302897&view=auto
==============================================================================
--- 
incubator/stanbol/trunk/demos/ehealth/src/main/indexing/config/ehealth/conf/elevate.xml
 (added)
+++ 
incubator/stanbol/trunk/demos/ehealth/src/main/indexing/config/ehealth/conf/elevate.xml
 Tue Mar 20 13:54:23 2012
@@ -0,0 +1,36 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<!-- If this file is found in the config directory, it will only be
+     loaded once at startup.  If it is found in Solr's data
+     directory, it will be re-loaded every commit.
+-->
+
+<elevate>
+<!-- <query text="foo bar"> 
+  <doc id="1" />
+  <doc id="2" />
+  <doc id="3" />
+ </query>
+ 
+ <query text="ipod">
+   <doc id="MA147LL/A" /> 
+   <doc id="IW-02" exclude="true" /> 
+ </query>
+ -->
+</elevate>

Propchange: 
incubator/stanbol/trunk/demos/ehealth/src/main/indexing/config/ehealth/conf/elevate.xml
------------------------------------------------------------------------------
    svn:mime-type = text/plain


Reply via email to