Author: rwesten
Date: Thu Mar 29 16:36:06 2012
New Revision: 1306972
URL: http://svn.apache.org/viewvc?rev=1306972&view=rev
Log:
small improvements to the eHealth demo
Added:
incubator/stanbol/trunk/demos/ehealth/src/main/indexing/config/entityTypes.properties
(with props)
incubator/stanbol/trunk/demos/ehealth/src/main/indexing/config/ldpath-mapping.txt
(with props)
Modified:
incubator/stanbol/trunk/demos/ehealth/src/main/indexing/config/indexing.properties
incubator/stanbol/trunk/demos/ehealth/src/main/indexing/config/mappings.txt
Added:
incubator/stanbol/trunk/demos/ehealth/src/main/indexing/config/entityTypes.properties
URL:
http://svn.apache.org/viewvc/incubator/stanbol/trunk/demos/ehealth/src/main/indexing/config/entityTypes.properties?rev=1306972&view=auto
==============================================================================
---
incubator/stanbol/trunk/demos/ehealth/src/main/indexing/config/entityTypes.properties
(added)
+++
incubator/stanbol/trunk/demos/ehealth/src/main/indexing/config/entityTypes.properties
Thu Mar 29 16:36:06 2012
@@ -0,0 +1,26 @@
+#Configuration for the FieldValueFilter
+
+#This can be used to configure specific rdf:types to be indexed. Entities with
+#other types will be filtered and not be included in the local DBpedia.org
+#index
+
+#How to configure
+
+#The key 'field' can be used to configure the field the filters are applied
+# - 'rdf:type' is used as default for the field
+# - Only a single field is supported. However one can configure multiple
instances
+# with different configurations in the 'indexing.properties' file.
+# - It is possible to use a full URI or prefix:localname for all prefixes
registered
+# in 'org.apache.stanbol.entityhub.servicesapi.defaults.NamespaceEnum'
+
+#field=rdf:type
+
+#The key 'values' is used to specify the filter
+# - This is a required configuration.
+# - The value '*' deactivates filtering
+# - Multiple types are supported. Configurations are separated by ';'
+# - It is possible to use full URIs are prefix:local name for all prefixes
registered
+# in 'org.apache.stanbol.entityhub.servicesapi.defaults.NamespaceEnum'
+
+#This deactivate filtering
+values=*
\ No newline at end of file
Propchange:
incubator/stanbol/trunk/demos/ehealth/src/main/indexing/config/entityTypes.properties
------------------------------------------------------------------------------
svn:mime-type = text/plain
Modified:
incubator/stanbol/trunk/demos/ehealth/src/main/indexing/config/indexing.properties
URL:
http://svn.apache.org/viewvc/incubator/stanbol/trunk/demos/ehealth/src/main/indexing/config/indexing.properties?rev=1306972&r1=1306971&r2=1306972&view=diff
==============================================================================
---
incubator/stanbol/trunk/demos/ehealth/src/main/indexing/config/indexing.properties
(original)
+++
incubator/stanbol/trunk/demos/ehealth/src/main/indexing/config/indexing.properties
Thu Mar 29 16:36:06 2012
@@ -23,61 +23,16 @@ name=ehealth
# created.
description=ehealth related datasets including Drugbank, Dailymed, SIDER and
Diseasome
-# Indexing Mode dependent Configurations: (see readme.md for details)
-
# (1) Iterate over Data and lookup scores: (default)
# use the Jena TDB as source for indexing the RDF data located within
# "indexing/resource/rdfdata"
entityDataIterable=org.apache.stanbol.entityhub.indexing.source.jenatdb.RdfIndexingSource,source:rdfdata
-# The EntityScore Provider needs to provide the scores for indexed entities
-# use the NoEntityScoreProvider if no scores are available
-entityScoreProvider=org.apache.stanbol.entityhub.indexing.core.source.NoEntityScoreProvider
-
-# The EntityFieldScoreProvider can be used to use the value of an property as
score
-# the property can be configured by the "field" parameter
-# Scores are parsed from numbers and strings that can be converted to numbers.
-#entityScoreProvider=org.apache.stanbol.entityhub.indexing.core.source.EntityFieldScoreProvider,field:http://www.example.org/myOntology#score
-
-# The EntityIneratorToScoreProviderAdapter can be used to adapt any configured
-# "entityIdIterator" to an "entityScoreProvider". See also the comments for
-# "entityIdIterator".
-#entityScoreProvider=org.apache.stanbol.entityhub.indexing.core.source.EntityIneratorToScoreProviderAdapter
-
-# Indexing VCARD
-
-# Alternative configuration for indexing vCard files
-# change the config for the vcard indexer in the "vcard.properties" file
-#entityDataIterable=org.apache.stanbol.entityhub.indexing.source.vcard.VcardIndexingSource,config:vcard
-#entityScoreProvider=org.apache.stanbol.entityhub.indexing.core.source.NoEntityScoreProvider
-
-
-
-# (2) Iterate over Entity IDs and lookup Data
-
-# First one needs to provide an EntityIterator
-# Typically the LineBasedEntityIterator implementation is used. The
configuration
-# for this implementation is typically provided in an own file. A default
-# configuration is provided by the iditerator.properties file.
-#entityIdIterator=org.apache.stanbol.entityhub.indexing.core.source.LineBasedEntityIterator,config:iditerator
-
-# Second a entityDataProvide needs to be specified. Here we use the Jena TDB
-# Note that this implementation implements both entityDataIterable AND
-# entityDataProvider.
-# RDF data needs to be located in the "indexing/resource/rdfdata"
-#
entityDataProvider=org.apache.stanbol.entityhub.indexing.source.jenatdb.RdfIndexingSource,source:rdfdata
-
#Score Normalizer
-# Entity Scores are normalised by the ScoreNormalizer
-# if no score normaliser is configured the scores will be used as provided by
-# the entities
-#scoreNormalizer=
-
-# ScoreNormalizer can be chained as shown by the following example
configuration
-# The score for an entity is first processed by the last normalizer
-#scoreNormalizer=org.apache.stanbol.entityhub.indexing.core.normaliser.RangeNormaliser,config:scorerange;org.apache.stanbol.entityhub.indexing.core.normaliser.NaturalLogNormaliser;org.apache.stanbol.entityhub.indexing.core.normaliser.MinScoreNormalizer,config:minscore
+# No entity scores available
+entityScoreProvider=org.apache.stanbol.entityhub.indexing.core.source.NoEntityScoreProvider
# Different Implementations:
# - RangeNormaliser: allows to define a range for score values. see
@@ -88,20 +43,37 @@ entityScoreProvider=org.apache.stanbol.e
# lower than the configured minimum. see "minscore.properties" for possible
# configurations.
-
# Entity Processor
-# Currently the only available implementation is the FiledMapperProcessor.
-entityProcessor=org.apache.stanbol.entityhub.indexing.core.processor.FiledMapperProcessor
+# Multiple Entity processors can be used for indexing entities. The are
separated by ';'
+# and are executed in the order of definition.
+
+# FiledMapperProcessor:
+#
+#
entityProcessor=org.apache.stanbol.entityhub.indexing.core.processor.FiledMapperProcessor
+#
+# This processor ensures that "field mappings" are executed while indexing
entities.
# By default it will use the mappings configured by the "fieldConfiguraton"
# property. To use other mappings one can use the "mappings" parameter (e.g.
# mappings:otherMappings.txt
-# A default mapping configuration is provided. This file also includes a lot of
-# comments.
+# FieldValueFilter
+#
+#entityProcessor=org.apache.stanbol.entityhub.indexing.core.processor.FieldValueFilter,config:entityTypes
+#
+# This allows to define a field and values that are used to filter entities.
Only Entities
+# that do have one of the defined values as actual value of the defined field
will
+# get indexed. This is typically used to filter entities by rdf:type, but can
be used
+# for any URI property. See the default entityTypes.properties file for more
information
-# to deactivate EntityProcessing one can use
+# EmptyProcessor
+#
#entityProcessor=org.apache.stanbol.entityhub.indexing.core.processor.EmptyProcessor
+#
+# This processor can be used to deactivate EntityProcessing
+
+# Default Entity Processor configuration
+entityProcessor=org.apache.stanbol.entityhub.indexing.core.processor.FieldValueFilter,config:entityTypes;org.apache.stanbol.entityhub.indexing.core.processor.LdpathProcessor,ldpath:ldpath-mapping.txt,append:true;org.apache.stanbol.entityhub.indexing.core.processor.FiledMapperProcessor
# Index Field Configuration
@@ -138,7 +110,7 @@ indexingDestination=org.apache.stanbol.e
# on a referenced site. If not specified requests for any entity will be
# forwarded to this referenced site.
# use ';' to seperate multiple values
-#org.apache.stanbol.entityhub.site.entityPrefix=http://example.org/resource;urn:mycompany:
+org.apache.stanbol.entityhub.site.entityPrefix=http://www4.wiwiss.fu-berlin.de/drugbank/;http://www4.wiwiss.fu-berlin.de/dailymed/;http://www4.wiwiss.fu-berlin.de/sider/;http://www4.wiwiss.fu-berlin.de/diseasome/;
# Configuration the remote Service
# If the indexed data are also available remotly (e.g. by a Linked data
endpoint)
@@ -167,16 +139,17 @@ org.apache.stanbol.entityhub.site.fieldM
# License(s)
-# Add here the name and URLs of the license to be used for all entities
-# provided by this referenced site
-# NOTE: licenseName and licenseUrl MUST use the ordering as below!
-# This example shows dual licensing with "cc by-sa" and GNU
+
+# The four datasets used by this demo use slightly different licenses
+# but they are at least public domain and non-commercial. Users that
+# want to use this commercially will need to lookup requirements first.
org.apache.stanbol.entityhub.site.licenseName=Public domain, non-commercial
#org.apache.stanbol.entityhub.site.licenseUrl=http://creativecommons.org/licenses/by-sa/3.0/;http://www.gnu.org/licenses/fdl.html
# Attribution
-# Some Licenses require attributions. This properties can be used to provide a
-# link to the site with the attribution and the attribution text
+
+# this includes attributions to all groups involved in the creation of any
+# of the four datasets.
org.apache.stanbol.entityhub.site.attributionUrl=http://www.drugbank.ca/about;http://dailymed.nlm.nih.gov/dailymed/about.cfm;http://sideeffects.embl.de/about;http://www.barabasilab.com/pubs/CCNR-ALB_Publications/200705-14_PNAS-HumanDisease/Suppl/index.htm
org.apache.stanbol.entityhub.site.attribution=To the drugbank, dailymed, SIDER
and human disease network teams.
Added:
incubator/stanbol/trunk/demos/ehealth/src/main/indexing/config/ldpath-mapping.txt
URL:
http://svn.apache.org/viewvc/incubator/stanbol/trunk/demos/ehealth/src/main/indexing/config/ldpath-mapping.txt?rev=1306972&view=auto
==============================================================================
---
incubator/stanbol/trunk/demos/ehealth/src/main/indexing/config/ldpath-mapping.txt
(added)
+++
incubator/stanbol/trunk/demos/ehealth/src/main/indexing/config/ldpath-mapping.txt
Thu Mar 29 16:36:06 2012
@@ -0,0 +1 @@
+skos:prefLabel = .[rdf:type is diseasome:genes]/rdfs:label;
\ No newline at end of file
Propchange:
incubator/stanbol/trunk/demos/ehealth/src/main/indexing/config/ldpath-mapping.txt
------------------------------------------------------------------------------
svn:mime-type = text/plain
Modified:
incubator/stanbol/trunk/demos/ehealth/src/main/indexing/config/mappings.txt
URL:
http://svn.apache.org/viewvc/incubator/stanbol/trunk/demos/ehealth/src/main/indexing/config/mappings.txt?rev=1306972&r1=1306971&r2=1306972&view=diff
==============================================================================
--- incubator/stanbol/trunk/demos/ehealth/src/main/indexing/config/mappings.txt
(original)
+++ incubator/stanbol/trunk/demos/ehealth/src/main/indexing/config/mappings.txt
Thu Mar 29 16:36:06 2012
@@ -63,6 +63,35 @@ dc-elements:type > dc:type
#also use dc-elements:title as label
dc-elements:title > rdfs:label
+# --- Simple Knowledge Organization System (SKOS) ---
+
+# A common data model for sharing and linking knowledge organization systems
+# via the Semantic Web. Typically used to encode controlled vocabularies as
+# a thesaurus
+skos:*
+
+# copy all SKOS labels (preferred, alternative and hidden) over to rdfs:label
+skos:prefLabel > rdfs:label
+skos:altLabel > rdfs:label
+skos:hiddenLabel > rdfs:label
+
+# copy values of **Match relations to the according related, broader and
narrower
+skos:relatedMatch > skos:related
+skos:broadMatch > skos:broader
+skos:narrowMatch > skos:skos:narrower
+
+#similar mappings for transitive variants are not contained, because transitive
+#reasoning is not directly supported by the Entityhub.
+
+# Some SKOS thesaurus do use "skos:transitiveBroader" and
"skos:transitiveNarrower"
+# however such properties are only intended to be used by reasoners to
+# calculate transitive closures over broader/narrower hierarchies.
+# see http://www.w3.org/TR/skos-reference/#L2413 for details
+# to correct such cases we will copy transitive relations to their counterpart
+skos:narrowerTransitive > skos:narrower
+skos:broaderTransitive > skos:broader
+
+
# --- E-HEALTH specific ---
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -