Author: rwesten
Date: Tue Mar 20 13:54:23 2012
New Revision: 1302897
URL: http://svn.apache.org/viewvc?rev=1302897&view=rev
Log:
initial commit of an eHealth domain demo. See README.md for details
Added:
incubator/stanbol/trunk/demos/ehealth/ (with props)
incubator/stanbol/trunk/demos/ehealth/README.md
incubator/stanbol/trunk/demos/ehealth/index.sh (with props)
incubator/stanbol/trunk/demos/ehealth/pom.xml (with props)
incubator/stanbol/trunk/demos/ehealth/src/
incubator/stanbol/trunk/demos/ehealth/src/main/
incubator/stanbol/trunk/demos/ehealth/src/main/indexing/
incubator/stanbol/trunk/demos/ehealth/src/main/indexing/config/
incubator/stanbol/trunk/demos/ehealth/src/main/indexing/config/ehealth/
incubator/stanbol/trunk/demos/ehealth/src/main/indexing/config/ehealth/conf/
incubator/stanbol/trunk/demos/ehealth/src/main/indexing/config/ehealth/conf/admin-extra.html
(with props)
incubator/stanbol/trunk/demos/ehealth/src/main/indexing/config/ehealth/conf/elevate.xml
(with props)
incubator/stanbol/trunk/demos/ehealth/src/main/indexing/config/ehealth/conf/mapping-FoldToASCII.txt
(with props)
incubator/stanbol/trunk/demos/ehealth/src/main/indexing/config/ehealth/conf/mapping-ISOLatin1Accent.txt
(with props)
incubator/stanbol/trunk/demos/ehealth/src/main/indexing/config/ehealth/conf/protwords.txt
(with props)
incubator/stanbol/trunk/demos/ehealth/src/main/indexing/config/ehealth/conf/schema.xml
(with props)
incubator/stanbol/trunk/demos/ehealth/src/main/indexing/config/ehealth/conf/scripts.conf
incubator/stanbol/trunk/demos/ehealth/src/main/indexing/config/ehealth/conf/solrconfig.xml
(with props)
incubator/stanbol/trunk/demos/ehealth/src/main/indexing/config/ehealth/conf/spellings.txt
(with props)
incubator/stanbol/trunk/demos/ehealth/src/main/indexing/config/ehealth/conf/stopwords.txt
(with props)
incubator/stanbol/trunk/demos/ehealth/src/main/indexing/config/ehealth/conf/stopwords_de.txt
(with props)
incubator/stanbol/trunk/demos/ehealth/src/main/indexing/config/ehealth/conf/synonyms.txt
(with props)
incubator/stanbol/trunk/demos/ehealth/src/main/indexing/config/ehealth/conf/synonyms_de.txt
(with props)
incubator/stanbol/trunk/demos/ehealth/src/main/indexing/config/fieldboosts.properties
incubator/stanbol/trunk/demos/ehealth/src/main/indexing/config/indexing.properties
incubator/stanbol/trunk/demos/ehealth/src/main/indexing/config/mappings.txt
incubator/stanbol/trunk/demos/ehealth/src/main/resources/
incubator/stanbol/trunk/demos/ehealth/src/main/resources/config/
incubator/stanbol/trunk/demos/ehealth/src/main/resources/config/org.apache.stanbol.enhancer.chain.weighted.impl.WeightedChain-ehealth.config
incubator/stanbol/trunk/demos/ehealth/src/main/resources/config/org.apache.stanbol.enhancer.engines.keywordextraction.engine.KeywordLinkingEngine-drugid.config
incubator/stanbol/trunk/demos/ehealth/src/main/resources/config/org.apache.stanbol.enhancer.engines.keywordextraction.engine.KeywordLinkingEngine-ehealth.config
Propchange: incubator/stanbol/trunk/demos/ehealth/
------------------------------------------------------------------------------
--- svn:ignore (added)
+++ svn:ignore Tue Mar 20 13:54:23 2012
@@ -0,0 +1 @@
+target
Added: incubator/stanbol/trunk/demos/ehealth/README.md
URL:
http://svn.apache.org/viewvc/incubator/stanbol/trunk/demos/ehealth/README.md?rev=1302897&view=auto
==============================================================================
--- incubator/stanbol/trunk/demos/ehealth/README.md (added)
+++ incubator/stanbol/trunk/demos/ehealth/README.md Tue Mar 20 13:54:23 2012
@@ -0,0 +1,71 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+
+Apache Stanbol Demo: eHealth
+============================
+
+This module provides a demo on how to customize Apache Stanbol to the ehealth
domain.
+
+This demo uses the following datasets:
+
+* __[Dailymed](http://dailymed.nlm.nih.gov/dailymed/)__([RDF
version](http://www4.wiwiss.fu-berlin.de/dailymed/))): Published by the
National Library of Medicine, this dataset provides high quality information
about marketed drugs.
+* __[SIDER](http://sideeffects.embl.de/)__([RDF
version](http://www4.wiwiss.fu-berlin.de/sider)): SIDER contains information on
marketed drugs and their adverse effects. The information is extracted from
public documents and package inserts.
+*
__[Diseasome](http://www.nd.edu/~networks/Publication%20Categories/03%20Journal%20Articles/Biology/HumanDisease_PNAS-V104-p8685(14My07).pdf)__([RDF
version](http://www4.wiwiss.fu-berlin.de/diseasome)): The human disease
network publishes a network of 4,300 disorders and disease genes linked by
known disorder-gene associations for exploring all known phenotype and disease
gene associations, indicating the common genetic origin of many diseases.
+* __[DrugBank](http://www.drugbank.ca/)__([RDF
version](http://www4.wiwiss.fu-berlin.de/drugbank)): A repository of almost
5000 FDA-approved small molecule and biotech drugs. It contains detailed
information about drugs including chemical, pharmacological and pharmaceutical
data; along with comprehensive drug target data such as sequence, structure,
and pathway information.
+
+Note that the RDF versions of this dataset used by this dataset is hosted [by
the [Freie Universität
Berlin](http://www.wiwiss.fu-berlin.de/en/institute/pwo/bizer/)
+
+This demo shows how to
+
+1. import the listed datasets to the Apache Stanbol Entityhub by using the
indexing utilities provided by the Stanbol Entityhub. Including the usage
examples for
+ * schema mappings during indexing (see
"src/main/indexing/config/mappings.txt")
+ * customized Apache Solr schemas to control how fields are indexed (see
solr core configuration at "src/main/indexing/config/ehealth")
+2. "install" the indexed data set - make them available via the Apache Stanbol
Entityhub
+3. configure the Stanbol Enhancer to extract ehealth related entities
+ * based on there labels (fields mapped to rdfs:label)
+ * Durgs based on their various IDs (fields mapped to skos:notation)
+
+## Usage
+
+To install the demo you will need to do the following steps
+
+1. execute "__mvn install__": This will create the bundle
"org.apache.stanbol.demo.ehealth-*.jar" in the "./target" folder. This bundle
includes the configuration for Apache Stanbol Components:
+ * Two configurations for the
[KeywordLinkingEngine](http://incubator.apache.org/stanbol/docs/trunk/enhancer/engines/keywordlinkingengine.html).
One that is configured to extract Entities of the above datasets based on
their labels and an other one that is configured to extract Drugs based on
their IDs.
+ * A special
[EnhancementChain](http://incubator.apache.org/stanbol/docs/trunk/enhancer/chains/)
for processing ehealt data.
+2. execute "__./index.sh__": This shell script automates the steps described
in detail by the [Working with local Entities
Guide](http://incubator.apache.org/stanbol/docs/trunk/customvocabulary.html).
This includes the following steps.
+ * assembly the generic RDF indexing tool
({stanbol}/entityhub/indexing/genericrdf)
+ * copy the configuration from "./src/main/indexing/config" to the target
directory used for indexing
+ * initialize missing configs by calling "java -jar
org.apache.stanbol.entityhub.indexing.genericrdf-*-jar-with-dependencies.jar
init"
+ * download the datasets listed above to
"./target/indexing/indexing/resources/rdfdata/"
+ * index the datasets
+ * copy the results "org.apache.stanbol.data.site.ehealth-1.0.0.jar" and
"ehealth.solrindex.zip" to the "./target" folder
+3. __Install__ the data to a running Stanbol instance: Both the Stable and the
Full launcher can be used as base for this demo. The following steps are
required to install the demo
+ 1. copy "./target/ehealth.solrindex.zip" to
"{stanbol-workingdir}/sling/datafils"
+ 2. install both bundles
"./target/org.apache.stanbol.data.site.ehealth-1.0.0.jar" and
"./target/org.apache.stanbol.demo.ehealth-*.jar" to your Stanbol instance.
Users can use the [Apache Felix Web
Console](http://localhost:8080/system/console/bundles)(url:
http:{host}:{port}/system/console/bundles) for this task.
+ 3. wait a minute until Stanbol has installed the data from the
"ehealth.solrindex.zip" file
+
+
+After that the you will be able to
+
+* use the datasets with the [Stanbol
Entityub](http://localhost:8080/entityhub/site/ehealth/)(url:
http:{host}:{port}/{alias}/entityhub/site/ehealth)
+* extract ehealth related terms by using the [Stanbol
Enhancer](http://localhost:8080/enhancer/chain/ehealth) (url:
http:{host}:{port}/{alias}/enhancer/chain/ehealth)
+
+
+## Backround information about this demo
+
+TODO!!
\ No newline at end of file
Added: incubator/stanbol/trunk/demos/ehealth/index.sh
URL:
http://svn.apache.org/viewvc/incubator/stanbol/trunk/demos/ehealth/index.sh?rev=1302897&view=auto
==============================================================================
--- incubator/stanbol/trunk/demos/ehealth/index.sh (added)
+++ incubator/stanbol/trunk/demos/ehealth/index.sh Tue Mar 20 13:54:23 2012
@@ -0,0 +1,90 @@
+#!/usr/bin/env bash
+
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# 1. build the indexing tool and copy it to the /data directory
+
+if [ ! -f target/indexing ]
+then
+ mkdir -p target/indexing
+fi
+
+if [ ! -f
target/indexing/org.apache.stanbol.entityhub.indexing.genericrdf-*-jar-with-dependencies.jar
]
+then
+ echo "Prepairing Indexing Tool"
+ cd ../../entityhub/indexing/genericrdf/
+ if [ ! -f
target/org.apache.stanbol.entityhub.indexing.genericrdf-*-jar-with-dependencies.jar
]
+ then
+ mvn assembly:single
+ fi
+ cd ../../../demos/ehealth/
+ cp
../../entityhub/indexing/genericrdf/target/org.apache.stanbol.entityhub.indexing.genericrdf-*-jar-with-dependencies.jar
target/indexing/
+else
+ echo "Indexing Tool present ... skip assembling a new version"
+fi
+
+# 2. init the configuration
+cd target/indexing/
+if [ ! -f indexing ]
+then
+ echo "Copying Indexing Configuration"
+ mkdir -p indexing/config
+ cp -R ../../src/main/indexing/config/ indexing/config
+ # init missing directories and config files
+ java -jar
org.apache.stanbol.entityhub.indexing.genericrdf-*-jar-with-dependencies.jar
init
+fi
+
+# 3. download the files
+cd indexing/resources/
+if [ ! -f imported ]
+then
+ cd rdfdata/
+ echo "Downloading RDF dumps"
+ if [ ! -f sider_dump.nt.bz2 ]
+ then
+ echo "Downloading SIDER"
+ wget -c http://www4.wiwiss.fu-berlin.de/sider/sider_dump.nt.bz2
+ fi
+
+ if [ ! -f drugbank_dump.nt.bz2 ]
+ then
+ echo "Downloading DrugBank"
+ wget -c http://www4.wiwiss.fu-berlin.de/drugbank/drugbank_dump.nt.bz2
+ fi
+
+ if [ ! -f dailymed_dump.nt.bz2 ]
+ then
+ echo "Downloading Dailymed"
+ wget -c http://www4.wiwiss.fu-berlin.de/dailymed/dailymed_dump.nt.bz2
+ fi
+
+ if [ ! -f diseasome_dump.nt.bz2 ]
+ then
+ echo "Downloading Diseasome"
+ wget -c http://www4.wiwiss.fu-berlin.de/diseasome/diseasome_dump.nt.bz2
+ fi
+ cd ..
+else
+ echo "RDF data already imported"
+fi
+cd ../..
+
+# 3 Now we can start the indexing
+
+java -jar -Xmx1024m -server
org.apache.stanbol.entityhub.indexing.genericrdf-*-jar-with-dependencies.jar
index
+
+# finally copy the dist to the /target directory
+cp -R indexing/dist/ ./..
Propchange: incubator/stanbol/trunk/demos/ehealth/index.sh
------------------------------------------------------------------------------
svn:executable = *
Added: incubator/stanbol/trunk/demos/ehealth/pom.xml
URL:
http://svn.apache.org/viewvc/incubator/stanbol/trunk/demos/ehealth/pom.xml?rev=1302897&view=auto
==============================================================================
--- incubator/stanbol/trunk/demos/ehealth/pom.xml (added)
+++ incubator/stanbol/trunk/demos/ehealth/pom.xml Tue Mar 20 13:54:23 2012
@@ -0,0 +1,73 @@
+<?xml version="1.0"?>
+ <!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the
+ NOTICE file distributed with this work for additional information
regarding copyright ownership. The ASF
+ licenses this file to You under the Apache License, Version 2.0 (the
"License"); you may not use this file
+ except in compliance with the License. You may obtain a copy of the
License at
+
+ http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable
law or agreed to in writing,
+ software distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR
+ CONDITIONS OF ANY KIND, either express or implied. See the License for the
specific language governing
+ permissions and limitations under the License.
+ -->
+<project>
+ <modelVersion>4.0.0</modelVersion>
+
+ <parent>
+ <groupId>org.apache.stanbol</groupId>
+ <artifactId>stanbol-parent</artifactId>
+ <version>0.9.0-incubating-SNAPSHOT</version>
+ <relativePath>../../parent</relativePath>
+ </parent>
+
+ <groupId>org.apache.stanbol</groupId>
+ <artifactId>org.apache.stanbol.demo.ehealth</artifactId>
+ <packaging>bundle</packaging>
+
+ <name>Apache Stanbol Demo: ehealth</name>
+ <description>A demo on how to customise Apache Stanbol for the ehealth
domain</description>
+ <scm>
+ <connection>
+
scm:svn:http://svn.apache.org/repos/asf/incubator/stanbol/trunk/demo/ehealth
+ </connection>
+ <developerConnection>
+
scm:svn:https://svn.apache.org/repos/asf/incubator/stanbol/trunk/demo/ehealth
+ </developerConnection>
+ <url>http://incubator.apache.org/stanbol</url>
+ </scm>
+
+ <build>
+ <!-- make it an OSGi bundle -->
+ <plugins>
+ <plugin>
+ <groupId>org.apache.felix</groupId>
+ <artifactId>maven-bundle-plugin</artifactId>
+ <extensions>true</extensions>
+ <configuration>
+ <instructions>
+ <Install-Path>config</Install-Path>
+ </instructions>
+ </configuration>
+ </plugin>
+ <plugin>
+ <groupId>org.apache.felix</groupId>
+ <artifactId>maven-scr-plugin</artifactId>
+ </plugin>
+ <plugin>
+ <groupId>org.apache.rat</groupId>
+ <artifactId>apache-rat-plugin</artifactId>
+ <configuration>
+ <excludes>
+ <!-- AL20 licensed files. See src/main/resources/README -->
+ <exclude>src/main/resources/config/*.config</exclude>
+ </excludes>
+ </configuration>
+ </plugin>
+ </plugins>
+ </build>
+
+ <dependencies>
+ <!-- none - config only -->
+ </dependencies>
+
+</project>
Propchange: incubator/stanbol/trunk/demos/ehealth/pom.xml
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added:
incubator/stanbol/trunk/demos/ehealth/src/main/indexing/config/ehealth/conf/admin-extra.html
URL:
http://svn.apache.org/viewvc/incubator/stanbol/trunk/demos/ehealth/src/main/indexing/config/ehealth/conf/admin-extra.html?rev=1302897&view=auto
==============================================================================
---
incubator/stanbol/trunk/demos/ehealth/src/main/indexing/config/ehealth/conf/admin-extra.html
(added)
+++
incubator/stanbol/trunk/demos/ehealth/src/main/indexing/config/ehealth/conf/admin-extra.html
Tue Mar 20 13:54:23 2012
@@ -0,0 +1,31 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<!-- The content of this page will be statically included into the top
+of the admin page. Uncomment this as an example to see there the content
+will show up.
+
+<hr>
+<i>This line will appear before the first table</i>
+<tr>
+<td colspan="2">
+This row will be appended to the end of the first table
+</td>
+</tr>
+<hr>
+
+-->
Propchange:
incubator/stanbol/trunk/demos/ehealth/src/main/indexing/config/ehealth/conf/admin-extra.html
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added:
incubator/stanbol/trunk/demos/ehealth/src/main/indexing/config/ehealth/conf/elevate.xml
URL:
http://svn.apache.org/viewvc/incubator/stanbol/trunk/demos/ehealth/src/main/indexing/config/ehealth/conf/elevate.xml?rev=1302897&view=auto
==============================================================================
---
incubator/stanbol/trunk/demos/ehealth/src/main/indexing/config/ehealth/conf/elevate.xml
(added)
+++
incubator/stanbol/trunk/demos/ehealth/src/main/indexing/config/ehealth/conf/elevate.xml
Tue Mar 20 13:54:23 2012
@@ -0,0 +1,36 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<!-- If this file is found in the config directory, it will only be
+ loaded once at startup. If it is found in Solr's data
+ directory, it will be re-loaded every commit.
+-->
+
+<elevate>
+<!-- <query text="foo bar">
+ <doc id="1" />
+ <doc id="2" />
+ <doc id="3" />
+ </query>
+
+ <query text="ipod">
+ <doc id="MA147LL/A" />
+ <doc id="IW-02" exclude="true" />
+ </query>
+ -->
+</elevate>
Propchange:
incubator/stanbol/trunk/demos/ehealth/src/main/indexing/config/ehealth/conf/elevate.xml
------------------------------------------------------------------------------
svn:mime-type = text/plain