svn commit: r1692320 [2/2] - in /stanbol/trunk: data/ data/sites/entity-coref-dbpedia/ data/sites/entity-coref-dbpedia/dbpedia_yago_classes/ data/sites/entity-coref-dbpedia/src/ data/sites/entity-coref-dbpedia/src/main/ data/sites/entity-coref-dbpedia/...

cpetroaca Wed, 22 Jul 2015 11:59:51 -0700

Added: 
stanbol/trunk/enhancement-engines/entitycoreference/src/main/java/org/apache/stanbol/enhancer/engines/entitycoreference/EntityCoReferenceEngine.java
URL: 
http://svn.apache.org/viewvc/stanbol/trunk/enhancement-engines/entitycoreference/src/main/java/org/apache/stanbol/enhancer/engines/entitycoreference/EntityCoReferenceEngine.java?rev=1692320&view=auto
==============================================================================
--- 
stanbol/trunk/enhancement-engines/entitycoreference/src/main/java/org/apache/stanbol/enhancer/engines/entitycoreference/EntityCoReferenceEngine.java
 (added)
+++ 
stanbol/trunk/enhancement-engines/entitycoreference/src/main/java/org/apache/stanbol/enhancer/engines/entitycoreference/EntityCoReferenceEngine.java
 Wed Jul 22 18:58:38 2015
@@ -0,0 +1,385 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.stanbol.enhancer.engines.entitycoreference;
+
+import static 
org.apache.stanbol.enhancer.nlp.utils.NlpEngineHelper.getLanguage;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Dictionary;
+import java.util.EnumSet;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.felix.scr.annotations.Activate;
+import org.apache.felix.scr.annotations.Component;
+import org.apache.felix.scr.annotations.Deactivate;
+import org.apache.felix.scr.annotations.Properties;
+import org.apache.felix.scr.annotations.Property;
+import org.apache.felix.scr.annotations.Reference;
+import org.apache.felix.scr.annotations.Service;
+import 
org.apache.stanbol.enhancer.engines.entitycoreference.datamodel.NounPhrase;
+import 
org.apache.stanbol.enhancer.engines.entitycoreference.impl.CoreferenceFinder;
+import 
org.apache.stanbol.enhancer.engines.entitycoreference.impl.NounPhraseFilterer;
+import org.apache.stanbol.enhancer.nlp.NlpAnnotations;
+import org.apache.stanbol.enhancer.nlp.model.AnalysedText;
+import org.apache.stanbol.enhancer.nlp.model.Section;
+import org.apache.stanbol.enhancer.nlp.model.Span;
+import org.apache.stanbol.enhancer.nlp.model.SpanTypeEnum;
+import org.apache.stanbol.enhancer.nlp.model.annotation.Value;
+import org.apache.stanbol.enhancer.nlp.ner.NerTag;
+import org.apache.stanbol.enhancer.nlp.phrase.PhraseTag;
+import org.apache.stanbol.enhancer.nlp.pos.LexicalCategory;
+import org.apache.stanbol.enhancer.nlp.utils.NlpEngineHelper;
+import org.apache.stanbol.enhancer.servicesapi.ContentItem;
+import org.apache.stanbol.enhancer.servicesapi.EngineException;
+import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine;
+import org.apache.stanbol.enhancer.servicesapi.ServiceProperties;
+import org.apache.stanbol.enhancer.servicesapi.impl.AbstractEnhancementEngine;
+import org.apache.stanbol.entityhub.servicesapi.Entityhub;
+import org.apache.stanbol.entityhub.servicesapi.site.SiteManager;
+import org.osgi.service.cm.ConfigurationException;
+import org.osgi.service.component.ComponentContext;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * This engine extracts references in the given text of noun phrases which 
point to NERs. The coreference is
+ * performed based on matching several of the named entity's dbpedia/yago 
properties to the noun phrase
+ * tokens.
+ * 
+ * TODO - Be able to detect possessive coreferences such as Germany's prime 
minister 
+ * TODO - be able to detect products and their developer such as Iphone 7 and 
Apple's new device. 
+ * TODO - provide the ability via config for the user to also allow 
coreferencing of 1 word noun phrases based 
+ * solely on comparison with entity class type?
+ * 
+ * @author Cristian Petroaca
+ * 
+ */
+@Component(immediate = true, metatype = true)
+@Service(value = EnhancementEngine.class)
+@Properties(value = {
+                     @Property(name = EnhancementEngine.PROPERTY_NAME, value = 
"entity-coreference"),
+                     @Property(name = 
EntityCoReferenceEngine.CONFIG_LANGUAGES, value = "en"),
+                     @Property(name = 
EntityCoReferenceEngine.REFERENCED_SITE_ID, value = "entity-coref-dbpedia"),
+                     @Property(name = EntityCoReferenceEngine.ENTITY_URI_BASE, 
value = "http://dbpedia.org/resource/";),
+                     @Property(name = EntityCoReferenceEngine.MAX_DISTANCE, 
intValue = Constants.MAX_DISTANCE_DEFAULT_VALUE),
+                                        @Property(name = 
EntityCoReferenceEngine.SPATIAL_ATTR_FOR_PERSON, value = 
Constants.DEFAULT_SPATIAL_ATTR_FOR_PERSON),
+                                        @Property(name = 
EntityCoReferenceEngine.SPATIAL_ATTR_FOR_ORGANIZATION, value = 
Constants.DEFAULT_SPATIAL_ATTR_FOR_ORGANIZATION),
+                                        @Property(name = 
EntityCoReferenceEngine.SPATIAL_ATTR_FOR_PLACE, value = 
Constants.DEFAULT_SPATIAL_ATTR_FOR_PLACE),
+                                        @Property(name = 
EntityCoReferenceEngine.ORG_ATTR_FOR_PERSON, value = 
Constants.DEFAULT_ORG_ATTR_FOR_PERSON),
+                                        @Property(name = 
EntityCoReferenceEngine.ENTITY_CLASSES_TO_EXCLUDE, value = 
Constants.DEFAULT_ENTITY_CLASSES_TO_EXCLUDE)})
+public class EntityCoReferenceEngine extends 
AbstractEnhancementEngine<RuntimeException,RuntimeException>
+        implements EnhancementEngine, ServiceProperties {
+
+    private static final Integer ENGINE_ORDERING = 
ServiceProperties.ORDERING_POST_PROCESSING + 91;
+
+    /**
+     * Language configuration. Takes a list of ISO language codes of supported 
languages. Currently supported
+     * are the languages given as default value.
+     */
+    protected static final String CONFIG_LANGUAGES = 
"enhancer.engine.entitycoreference.languages";
+
+    /**
+     * Referenced site configuration. Defaults to dbpedia.
+     */
+    protected static final String REFERENCED_SITE_ID = 
"enhancer.engine.entitycoreference.referencedSiteId";
+
+    /**
+     * 
+     */
+    protected static final String ENTITY_URI_BASE = 
"enhancer.engine.entitycoreference.entity.uri.base";
+    
+    /**
+     * Maximum sentence distance between the ner and the noun phrase which 
mentions it. -1 means no distance
+     * constraint.
+     */
+    protected static final String MAX_DISTANCE = 
"enhancer.engine.entitycoreference.maxDistance";
+
+    /**
+     * Attributes used for spatial coreference when dealing with a person 
entity.
+     */
+    protected static final String SPATIAL_ATTR_FOR_PERSON = 
"enhancer.engine.entitycoreference.spatial.attr.person";
+    
+    /**
+     * Attributes used for spatial coreference when dealing with an 
organization entity.
+     */
+    protected static final String SPATIAL_ATTR_FOR_ORGANIZATION = 
"enhancer.engine.entitycoreference.spatial.attr.org";
+    
+    /**
+     * Attributes used for spatial coreference when dealing with a place 
entity.
+     */
+    protected static final String SPATIAL_ATTR_FOR_PLACE = 
"enhancer.engine.entitycoreference.spatial.attr.place";
+    
+    /**
+     * Attributes used for organisational membership coreference when dealing 
with a person entity.
+     */
+    protected static final String ORG_ATTR_FOR_PERSON = 
"enhancer.engine.entitycoreference.org.attr.person";
+    
+    /**
+     * Entity classes which will be excluded when doing the entity class type 
matching 
+     * because they are too general in nature.
+     */
+    protected static final String ENTITY_CLASSES_TO_EXCLUDE = 
"enhancer.engine.entitycoreference.entity.classes.excluded";
+    
+    /**
+     * Logger
+     */
+    private final Logger log = 
LoggerFactory.getLogger(EntityCoReferenceEngine.class);
+
+    /**
+     * Service of the Entityhub that manages all the active referenced Site. 
This Service is used to lookup
+     * the configured Referenced Site when we need to enhance a content item.
+     */
+    @Reference
+    protected SiteManager siteManager;
+
+    /**
+     * Used to lookup Entities if the {@link #REFERENCED_SITE_ID} property is 
set to "entityhub" or "local"
+     */
+    @Reference
+    protected Entityhub entityhub;
+
+    /**
+     * Specialized class which filters out bad noun phrases based on the 
language.
+     */
+    private NounPhraseFilterer nounPhraseFilterer;
+
+    /**
+     * Performs the logic needed to find corefs based on the NERs and noun 
phrases in the text.
+     */
+    private CoreferenceFinder corefFinder;
+
+    @SuppressWarnings("unchecked")
+    @Activate
+    protected void activate(ComponentContext ctx) throws 
ConfigurationException {
+        super.activate(ctx);
+
+        Dictionary<String,Object> config = ctx.getProperties();
+
+        /* Step 1 - initialize the {@link NounPhraseFilterer} with the 
language config */
+        String languages = (String) config.get(CONFIG_LANGUAGES);
+
+        if (languages == null || languages.isEmpty()) {
+            throw new ConfigurationException(CONFIG_LANGUAGES,
+                    "The Languages Config is a required Parameter and MUST NOT 
be NULL or an empty String!");
+        }
+
+        nounPhraseFilterer = new NounPhraseFilterer(languages.split(","));
+
+        /* Step 2 - initialize the {@link CoreferenceFinder} */
+        String referencedSiteID = null;
+        Object referencedSiteIDfromConfig = config.get(REFERENCED_SITE_ID);
+
+        if (referencedSiteIDfromConfig == null) {
+            throw new ConfigurationException(REFERENCED_SITE_ID,
+                    "The ID of the Referenced Site is a required Parameter and 
MUST NOT be NULL!");
+        }
+
+        referencedSiteID = referencedSiteIDfromConfig.toString();
+        if (referencedSiteID.isEmpty()) {
+            throw new ConfigurationException(REFERENCED_SITE_ID,
+                    "The ID of the Referenced Site is a required Parameter and 
MUST NOT be an empty String!");
+        }
+
+        if (Entityhub.ENTITYHUB_IDS.contains(referencedSiteID.toLowerCase())) {
+            log.debug("Init NamedEntityTaggingEngine instance for the 
Entityhub");
+            referencedSiteID = null;
+        }
+
+        int maxDistance;
+        Object maxDistanceFromConfig = config.get(MAX_DISTANCE);
+
+        if (maxDistanceFromConfig == null) {
+            maxDistance = Constants.MAX_DISTANCE_DEFAULT_VALUE;
+        } else if (maxDistanceFromConfig instanceof Number) {
+            maxDistance = ((Number) maxDistanceFromConfig).intValue();
+        } else {
+            try {
+                maxDistance = 
Integer.parseInt(maxDistanceFromConfig.toString());
+            } catch (NumberFormatException nfe) {
+                throw new ConfigurationException(MAX_DISTANCE, "The Max 
Distance parameter must be a number");
+            }
+        }
+
+        if (maxDistance < -1) {
+            throw new ConfigurationException(MAX_DISTANCE,
+                    "The Max Distance parameter must not be smaller than -1");
+        }
+        
+        String entityUriBase = (String) config.get(ENTITY_URI_BASE);
+        if (entityUriBase == null || entityUriBase.isEmpty()) {
+               throw new ConfigurationException(ENTITY_URI_BASE, "The Entity 
Uri Base parameter cannot be empty");
+        }
+        
+        String spatialAttrForPerson = (String) 
config.get(SPATIAL_ATTR_FOR_PERSON);
+        String spatialAttrForOrg = (String) 
config.get(SPATIAL_ATTR_FOR_ORGANIZATION);
+        String spatialAttrForPlace = (String) 
config.get(SPATIAL_ATTR_FOR_PLACE);
+        String orgAttrForPerson = (String) config.get(ORG_ATTR_FOR_PERSON);
+        String entityClassesToExclude = (String) 
config.get(ENTITY_CLASSES_TO_EXCLUDE);
+        
+        corefFinder = new CoreferenceFinder(languages.split(","), siteManager, 
entityhub, referencedSiteID,
+                maxDistance, entityUriBase, spatialAttrForPerson, 
spatialAttrForOrg, 
+                spatialAttrForPlace, orgAttrForPerson, entityClassesToExclude);
+
+        log.info("activate {}[name:{}]", getClass().getSimpleName(), 
getName());
+    }
+
+    @Override
+    public Map<String,Object> getServiceProperties() {
+        return 
Collections.unmodifiableMap(Collections.singletonMap(ENHANCEMENT_ENGINE_ORDERING,
+            (Object) ENGINE_ORDERING));
+    }
+
+    @Override
+    public int canEnhance(ContentItem ci) throws EngineException {
+        String language = getLanguage(this, ci, false);
+        if (language == null) {
+            log.debug("Engine {} ignores ContentItem {} becuase language {} is 
not detected.",
+                new Object[] {getName(), ci.getUri(), language});
+            return CANNOT_ENHANCE;
+        }
+
+        if (!nounPhraseFilterer.supportsLanguage(language)) {
+            log.debug("Engine {} does not support language {}.", new Object[] 
{getName(), language});
+            return CANNOT_ENHANCE;
+        }
+
+        return ENHANCE_SYNCHRONOUS;
+    }
+
+    @Override
+    public void computeEnhancements(ContentItem ci) throws EngineException {
+        /*
+         * Step 1 - Build the NER list and the noun phrase list.
+         * 
+         * TODO - the noun phrases need to be lemmatized.
+         */
+        Map<Integer,List<Span>> ners = new HashMap<Integer,List<Span>>();
+        List<NounPhrase> nounPhrases = new ArrayList<NounPhrase>();
+        extractNersAndNounPhrases(ci, ners, nounPhrases);
+
+        /*
+         * If there are no NERs to reference there's nothing to do but exit.
+         */
+        if (ners.size() == 0) {
+            log.info("Did not find any NERs for which to do the 
coreferencing");
+            return;
+        }
+
+        /*
+         * Step 2 - Filter out bad noun phrases.
+         */
+        String language = getLanguage(this, ci, false);
+        if (language == null) {
+            log.info("Could not detect the language of the text");
+            return;
+        }
+
+        nounPhraseFilterer.filter(nounPhrases, language);
+
+        /*
+         * If there are no good noun phrases there's nothing to do but exit.
+         */
+        if (nounPhrases.size() == 0) {
+            log.info("Did not find any noun phrases with which to do the 
coreferencing");
+            return;
+        }
+
+        /*
+         * Step 3 - Extract corefs and write them as {@link 
NlpAnnotations.COREF_ANNOTATION}s in the {@link
+         * Span}s
+         */
+        corefFinder.extractCorefs(ners, nounPhrases, language);
+    }
+
+    @Deactivate
+    protected void deactivate(ComponentContext ctx) {
+        log.info("deactivate {}[name:{}]", getClass().getSimpleName(), 
getName());
+
+        nounPhraseFilterer = null;
+        corefFinder = null;
+
+        super.deactivate(ctx);
+    }
+
+    /**
+     * Extracts the NERs and the noun phrases from the given text and puts 
them in the given lists.
+     * 
+     * @param ci
+     * @param ners
+     * @param nounPhrases
+     */
+    private void extractNersAndNounPhrases(ContentItem ci,
+                                           Map<Integer,List<Span>> ners,
+                                           List<NounPhrase> nounPhrases) {
+        AnalysedText at = NlpEngineHelper.getAnalysedText(this, ci, true);
+        Iterator<? extends Section> sections = at.getSentences();
+        if (!sections.hasNext()) { // process as single sentence
+            sections = Collections.singleton(at).iterator();
+        }
+
+        int sentenceCnt = 0;
+        while (sections.hasNext()) {
+            sentenceCnt++;
+            Section section = sections.next();
+            List<NounPhrase> sectionNounPhrases = new ArrayList<NounPhrase>();
+            List<Span> sectionNers = new ArrayList<Span>();
+
+            Iterator<Span> chunks = 
section.getEnclosed(EnumSet.of(SpanTypeEnum.Chunk));
+            while (chunks.hasNext()) {
+                Span chunk = chunks.next();
+
+                Value<NerTag> ner = 
chunk.getAnnotation(NlpAnnotations.NER_ANNOTATION);
+                if (ner != null) {
+                    sectionNers.add(chunk);
+                }
+
+                Value<PhraseTag> phrase = 
chunk.getAnnotation(NlpAnnotations.PHRASE_ANNOTATION);
+                if (phrase != null && phrase.value().getCategory() == 
LexicalCategory.Noun) {
+                    sectionNounPhrases.add(new NounPhrase(chunk, sentenceCnt));
+                }
+            }
+
+            for (NounPhrase nounPhrase : sectionNounPhrases) {
+                Iterator<Span> tokens = 
section.getEnclosed(EnumSet.of(SpanTypeEnum.Token));
+
+                while (tokens.hasNext()) {
+                    Span token = tokens.next();
+
+                    if (nounPhrase.containsSpan(token)) {
+                        nounPhrase.addToken(token);
+                    }
+                }
+
+                for (Span sectionNer : sectionNers) {
+                    if (nounPhrase.containsSpan(sectionNer)) {
+                        nounPhrase.addNerChunk(sectionNer);
+                    }
+                }
+            }
+
+            nounPhrases.addAll(sectionNounPhrases);
+
+            if (!sectionNers.isEmpty()) {
+                ners.put(sentenceCnt, sectionNers);
+            }
+        }
+    }
+}


Added: 
stanbol/trunk/enhancement-engines/entitycoreference/src/main/java/org/apache/stanbol/enhancer/engines/entitycoreference/datamodel/NounPhrase.java
URL: 
http://svn.apache.org/viewvc/stanbol/trunk/enhancement-engines/entitycoreference/src/main/java/org/apache/stanbol/enhancer/engines/entitycoreference/datamodel/NounPhrase.java?rev=1692320&view=auto
==============================================================================
--- 
stanbol/trunk/enhancement-engines/entitycoreference/src/main/java/org/apache/stanbol/enhancer/engines/entitycoreference/datamodel/NounPhrase.java
 (added)
+++ 
stanbol/trunk/enhancement-engines/entitycoreference/src/main/java/org/apache/stanbol/enhancer/engines/entitycoreference/datamodel/NounPhrase.java
 Wed Jul 22 18:58:38 2015
@@ -0,0 +1,164 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.stanbol.enhancer.engines.entitycoreference.datamodel;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.stanbol.enhancer.nlp.model.Span;
+
+/**
+ * Encapsulates span and sentence information about a noun phrase.
+ * 
+ * @author Cristian Petroaca
+ * 
+ */
+public class NounPhrase {
+    /**
+     * The {@link Span} which represents this noun phrase.
+     */
+    private Span chunk;
+
+    /*
+     * TODO - should use Set instead?
+     */
+    /**
+     * The {@link Span}s - tokens - which make up this noun phrase.
+     */
+    private List<Span> tokens;
+
+    /**
+     * The {@link Span}s contained in this noun phrase which represent Ners.
+     */
+    private List<Span> nerChunks;
+
+    /**
+     * The sentence index in which this noun phrase is found.
+     */
+    private int sentenceNo;
+
+    public NounPhrase(Span chunk, int sentenceNo) {
+        if (chunk == null) {
+            throw new IllegalArgumentException("Chunk cannot be null");
+        }
+
+        this.chunk = chunk;
+        this.tokens = new ArrayList<Span>();
+        this.nerChunks = new ArrayList<Span>();
+        this.sentenceNo = sentenceNo;
+    }
+
+    /**
+     * Gets the chunk representing this noun phrase.
+     * 
+     * @return
+     */
+    public Span getChunk() {
+        return chunk;
+    }
+
+    /**
+     * Adds a new token which is found in this noun phrase.
+     * 
+     * @param token
+     */
+    public void addToken(Span token) {
+        /*
+         * TODO - validate token boundaries within this noun phrase.
+         */
+        tokens.add(token);
+    }
+
+    /**
+     * Gets the list of tokens which make up this noun phrase.
+     * 
+     * @return
+     */
+    public List<Span> getTokens() {
+        return tokens;
+    }
+
+    /**
+     * Adds a new NER chunk which is found within this noun phrase.
+     * 
+     * @param chunk
+     */
+    public void addNerChunk(Span chunk) {
+        /*
+         * TODO - validate NER boundaries within this noun phrase.
+         */
+        nerChunks.add(chunk);
+    }
+
+    /**
+     * Gets the list of NERs within this noun phrase.
+     * 
+     * @return
+     */
+    public List<Span> getNerChunks() {
+        return nerChunks;
+    }
+
+    /**
+     * Determines whether this noun phrase's {@link Span} contains the given 
{@link Span}.
+     * 
+     * @param span
+     * @return
+     */
+    public boolean containsSpan(Span span) {
+        return (span.getStart() >= chunk.getStart() && span.getEnd() <= 
chunk.getEnd());
+    }
+
+    /**
+     * Determines whether this noun phrase has NERs.
+     * 
+     * @return
+     */
+    public boolean hasNers() {
+        return nerChunks.size() > 0;
+    }
+
+    /**
+     * Returns the sentence index in which this noun phrase is found.
+     * 
+     * @return
+     */
+    public int getSentenceNo() {
+        return this.sentenceNo;
+    }
+
+    public int hashCode() {
+        final int prime = 31;
+        int result = 1;
+        result = prime * result + chunk.hashCode();
+        result = prime * result + tokens.hashCode();
+        result = prime * result + nerChunks.hashCode();
+
+        return result;
+    }
+
+    public boolean equals(Object obj) {
+        if (this == obj) return true;
+        if (obj == null) return false;
+        if (getClass() != obj.getClass()) return false;
+
+        NounPhrase other = (NounPhrase) obj;
+
+        return chunk.equals(other.chunk) && tokens.equals(other.tokens) && 
nerChunks.equals(other.nerChunks)
+               && sentenceNo == other.sentenceNo;
+    }
+}

Added: 
stanbol/trunk/enhancement-engines/entitycoreference/src/main/java/org/apache/stanbol/enhancer/engines/entitycoreference/datamodel/PlaceAdjectival.java
URL: 
http://svn.apache.org/viewvc/stanbol/trunk/enhancement-engines/entitycoreference/src/main/java/org/apache/stanbol/enhancer/engines/entitycoreference/datamodel/PlaceAdjectival.java?rev=1692320&view=auto
==============================================================================
--- 
stanbol/trunk/enhancement-engines/entitycoreference/src/main/java/org/apache/stanbol/enhancer/engines/entitycoreference/datamodel/PlaceAdjectival.java
 (added)
+++ 
stanbol/trunk/enhancement-engines/entitycoreference/src/main/java/org/apache/stanbol/enhancer/engines/entitycoreference/datamodel/PlaceAdjectival.java
 Wed Jul 22 18:58:38 2015
@@ -0,0 +1,82 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.stanbol.enhancer.engines.entitycoreference.datamodel;
+
+import org.apache.clerezza.rdf.core.UriRef;
+
+/**
+ * Represents a place adjectival inside a {@link Span}.
+ * 
+ * @author Cristian Petroaca
+ * 
+ */
+public class PlaceAdjectival {
+    /**
+     * The start index in the {@link Span}.
+     */
+    private int startIdx;
+
+    /**
+     * The end index in the {@link Span}.
+     */
+    private int endIdx;
+
+    /**
+     * The {@link UriRef} in the {@link SiteManager} or {@link Entityhub} that 
this place adjectival points
+     * to.
+     */
+    private UriRef placeUri;
+
+    public PlaceAdjectival(int startIdx, int endIdx, UriRef placeUri) {
+        this.startIdx = startIdx;
+        this.endIdx = endIdx;
+        this.placeUri = placeUri;
+    }
+
+    public UriRef getPlaceUri() {
+        return placeUri;
+    }
+
+    public int getStart() {
+        return this.startIdx;
+    }
+
+    public int getEnd() {
+        return this.endIdx;
+    }
+
+    public int hashCode() {
+        final int prime = 31;
+        int result = 1;
+        result = prime * result + startIdx;
+        result = prime * result + endIdx;
+        result = prime * result + placeUri.hashCode();
+
+        return result;
+    }
+
+    public boolean equals(Object obj) {
+        if (this == obj) return true;
+        if (obj == null) return false;
+        if (getClass() != obj.getClass()) return false;
+
+        PlaceAdjectival other = (PlaceAdjectival) obj;
+
+        return this.startIdx == other.startIdx && this.endIdx == other.endIdx
+               && this.placeUri.equals(other.placeUri);
+    }
+}

Added: 
stanbol/trunk/enhancement-engines/entitycoreference/src/main/java/org/apache/stanbol/enhancer/engines/entitycoreference/impl/CoreferenceFinder.java
URL: 
http://svn.apache.org/viewvc/stanbol/trunk/enhancement-engines/entitycoreference/src/main/java/org/apache/stanbol/enhancer/engines/entitycoreference/impl/CoreferenceFinder.java?rev=1692320&view=auto
==============================================================================
--- 
stanbol/trunk/enhancement-engines/entitycoreference/src/main/java/org/apache/stanbol/enhancer/engines/entitycoreference/impl/CoreferenceFinder.java
 (added)
+++ 
stanbol/trunk/enhancement-engines/entitycoreference/src/main/java/org/apache/stanbol/enhancer/engines/entitycoreference/impl/CoreferenceFinder.java
 Wed Jul 22 18:58:38 2015
@@ -0,0 +1,404 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.stanbol.enhancer.engines.entitycoreference.impl;
+
+import static org.apache.stanbol.enhancer.nlp.NlpAnnotations.COREF_ANNOTATION;
+import static 
org.apache.stanbol.enhancer.servicesapi.rdf.Properties.RDFS_LABEL;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.RDF_TYPE;
+
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.clerezza.rdf.core.UriRef;
+import org.apache.stanbol.enhancer.engines.entitycoreference.Constants;
+import 
org.apache.stanbol.enhancer.engines.entitycoreference.datamodel.NounPhrase;
+import 
org.apache.stanbol.enhancer.engines.entitycoreference.datamodel.PlaceAdjectival;
+import org.apache.stanbol.enhancer.nlp.NlpAnnotations;
+import org.apache.stanbol.enhancer.nlp.coref.CorefFeature;
+import org.apache.stanbol.enhancer.nlp.model.Span;
+import org.apache.stanbol.enhancer.nlp.model.annotation.Value;
+import org.apache.stanbol.enhancer.servicesapi.EngineException;
+import org.apache.stanbol.enhancer.servicesapi.rdf.OntologicalClasses;
+import org.apache.stanbol.entityhub.servicesapi.Entityhub;
+import org.apache.stanbol.entityhub.servicesapi.model.Entity;
+import org.apache.stanbol.entityhub.servicesapi.model.Text;
+import org.apache.stanbol.entityhub.servicesapi.query.Constraint;
+import org.apache.stanbol.entityhub.servicesapi.query.FieldQuery;
+import org.apache.stanbol.entityhub.servicesapi.query.FieldQueryFactory;
+import org.apache.stanbol.entityhub.servicesapi.query.QueryResultList;
+import org.apache.stanbol.entityhub.servicesapi.query.ReferenceConstraint;
+import org.apache.stanbol.entityhub.servicesapi.query.TextConstraint;
+import org.apache.stanbol.entityhub.servicesapi.site.Site;
+import org.apache.stanbol.entityhub.servicesapi.site.SiteManager;
+import org.osgi.service.cm.ConfigurationException;
+
+/**
+ * Uses the list of NERs and the list of {@link NounPhrase}s found in the 
analyzed text to find possible
+ * co-references.
+ * 
+ * @author Cristian Petroaca
+ * 
+ */
+public class CoreferenceFinder {
+    /**
+     * The configured {@link SiteManager} for {@link Entity} storage.
+     */
+    private SiteManager siteManager;
+
+    /**
+     * The default {@link Entity} storage.
+     */
+    private Entityhub entityHub;
+
+    /**
+     * The name of the configured site for the {@link SiteManager}.
+     */
+    private String referencedSiteID;
+
+    /**
+     * In memory cache storing {@link Entity} types which are often used.
+     */
+    private InMemoryEntityTypeIndex entityTypeIndex;
+
+    /**
+     * Class holding configuration params.
+     */
+    private CoreferenceFinderConfig config;
+
+    /**
+     * Holds vocabulary.dictionary info such as the list of place adjectivals 
by language.
+     */
+    private Dictionaries dictionaries;
+
+    public CoreferenceFinder(String[] languages,
+                             SiteManager siteManager,
+                             Entityhub entityHub,
+                             String referencedSiteID,
+                             int maxDistance,
+                             String entityUriBase,
+                             String spatialAttrForPerson,
+                             String spatialAttrForOrg,
+                             String spatialAttrForPlace,
+                             String orgAttributesForPerson,
+                             String entityClassesToExclude) throws 
ConfigurationException {
+        this.siteManager = siteManager;
+        this.entityHub = entityHub;
+        this.referencedSiteID = referencedSiteID;
+        this.entityTypeIndex = new InMemoryEntityTypeIndex();
+        this.config = new CoreferenceFinderConfig(maxDistance, 
spatialAttrForPerson, 
+                       spatialAttrForOrg, spatialAttrForPlace, 
orgAttributesForPerson, entityClassesToExclude);
+        this.dictionaries = new Dictionaries(languages, entityUriBase);
+    }
+
+    /**
+     * Performs the actual coreference resolution by iterating through all the 
NERs and all the
+     * {@link NounPhrase}s which are after the given Ner in the text. If any 
coreferences are found they are
+     * written as {@link NlpAnnotation}s in the NER and noun phrase {@link 
Span}s.
+     * 
+     * @param ners
+     * @param nounPhrases
+     * @param language
+     * @throws EngineException
+     */
+    public void extractCorefs(Map<Integer,List<Span>> ners, List<NounPhrase> 
nounPhrases, String language) throws EngineException {
+        for (Map.Entry<Integer,List<Span>> entry : ners.entrySet()) {
+            int nerSentenceNo = entry.getKey();
+            List<Span> nerSpans = entry.getValue();
+            int maxDistance = this.config.getMaxDistance();
+
+            for (Span ner : nerSpans) {
+                Entity entity = null;
+                Set<String> typeLabels = null;
+                Set<Span> corefs = new HashSet<Span>();
+
+                for (NounPhrase nounPhrase : nounPhrases) {
+                    int nounPhraseSentenceNo = nounPhrase.getSentenceNo();
+
+                    if (nounPhrase.getChunk().getStart() > ner.getStart()
+                        && (maxDistance != Constants.MAX_DISTANCE_NO_CONSTRAINT
+                            && nounPhraseSentenceNo > nerSentenceNo && 
nounPhraseSentenceNo - nerSentenceNo <= maxDistance)) {
+
+                        if (entity == null) {
+                            entity = lookupEntity(ner, language);
+
+                            /*
+                             * If the entity is still null there's nothing to 
do but go to the next ner.
+                             */
+                            if (entity == null) break;
+
+                            if (typeLabels == null) {
+                                typeLabels = buildEntityTypeLabels(entity, 
language);
+                            }
+                        }
+
+                        if (isCoreferent(typeLabels, entity, ner, nounPhrase, 
language)) {
+                            Set<Span> coreferencedNer = new HashSet<Span>();
+                            coreferencedNer.add(ner);
+                            Span chunk = nounPhrase.getChunk();
+
+                            chunk.addAnnotation(COREF_ANNOTATION,
+                                Value.value(new CorefFeature(false, 
coreferencedNer)));
+                            corefs.add(chunk);
+                        }
+                    }
+                }
+
+                if (corefs.size() > 0) {
+                    ner.addAnnotation(COREF_ANNOTATION, Value.value(new 
CorefFeature(true, corefs)));
+                }
+            }
+        }
+    }
+
+    /**
+     * Gets an Entity from the configured {@link Site} based on the NER text 
and type.
+     * 
+     * @param ner
+     * @param language
+     * @return
+     * @throws EngineException
+     */
+    private Entity lookupEntity(Span ner, String language) throws 
EngineException {
+        Site site = getReferencedSite();
+        FieldQueryFactory queryFactory = site == null ? 
entityHub.getQueryFactory() : site.getQueryFactory();
+        FieldQuery query = queryFactory.createFieldQuery();
+
+        Constraint labelConstraint;
+        String namedEntityLabel = ner.getSpan();
+        labelConstraint = new TextConstraint(namedEntityLabel, false, 
language, null);
+        query.setConstraint(RDFS_LABEL.getUnicodeString(), labelConstraint);
+        query.setConstraint(RDF_TYPE.getUnicodeString(),
+            new 
ReferenceConstraint(ner.getAnnotation(NlpAnnotations.NER_ANNOTATION).value().getType()
+                    .getUnicodeString()));
+        query.setLimit(1);
+        QueryResultList<Entity> results = site == null ? // if site is NULL
+        entityHub.findEntities(query)
+                : // use the Entityhub
+                site.findEntities(query); // else the referenced site
+
+        if (results.isEmpty()) return null;
+
+        // We set the limit to 1 so if it found anything it should contain 
just 1 entry
+        return results.iterator().next();
+    }
+
+    /**
+     * Performs the coreference matching rules: 1. Match the entity type. 2. 
If the {@link NounPhrase}
+     * contains any NERs match the NER to any spatial/org 
membership/functional Entity properties from the
+     * {@link Site}. 3. If {@link NounPhrase} contains any place adjectivals 
perform spatial co-reference
+     * based on the entity spatial properties.
+     * 
+     * @param typeLabels
+     *            - a list of types (classes) that the given entity has.
+     * @param entity
+     *            - the entity for which we want to do the coref.
+     * @param ner
+     *            - the ner in the text for which we want to do the coref.
+     * @param nounPhrase
+     *            - the {@link NounPhrase} which we want to test for coref.
+     * @param language
+     *            - the language of the text.
+     * @return
+     * @throws EngineException
+     */
+    private boolean isCoreferent(Set<String> typeLabels,
+                                 Entity entity,
+                                 Span ner,
+                                 NounPhrase nounPhrase,
+                                 String language) throws EngineException {
+        /*
+         * 1. Try to match the entity class to the noun phrase.
+         */
+        String matchedClass = null;
+        String nounPhraseText = nounPhrase.getChunk().getSpan().toLowerCase();
+        int classStart = 0;
+        int classEnd = 0;
+
+        for (String label : typeLabels) {
+            if (nounPhraseText.matches(".*\\b" + label + "\\b.*")
+                && (matchedClass == null || label.split("\\s").length > 
matchedClass.split("\\s").length)) {
+                matchedClass = label;
+                classStart = nounPhrase.getChunk().getStart() + 
nounPhraseText.indexOf(label);
+                classEnd = classStart + label.length();
+            }
+        }
+
+        if (matchedClass == null) return false;
+
+        /*
+         * 2. See if there are any NERs in the noun phrase to further identify 
the coref. Any NERs found
+         * should be separate words from the class matches from point 1.
+         */
+        /*
+         * TODO - devise a coref confidence scheme?
+         */
+        if (nounPhrase.hasNers()) {
+            List<Span> npNers = nounPhrase.getNerChunks();
+            UriRef nerType = 
ner.getAnnotation(NlpAnnotations.NER_ANNOTATION).value().getType();
+
+            for (Span npNer : npNers) {
+                /*
+                 * Don't go any further if for some reason it turns out that 
the ner text is the same as the
+                 * entity class text.
+                 */
+                if ((npNer.getStart() >= classStart && npNer.getStart() <= 
classEnd)
+                    || (npNer.getEnd() >= classStart && npNer.getEnd() <= 
classEnd)) continue;
+
+                Entity npEntity = lookupEntity(npNer, language);
+
+                if (npEntity != null) {
+                    UriRef npNerType = 
npNer.getAnnotation(NlpAnnotations.NER_ANNOTATION).value().getType();
+                    Set<String> rulesOntologyAttr = new HashSet<String>();
+
+                    if (OntologicalClasses.DBPEDIA_PLACE.equals(npNerType)) {
+                        rulesOntologyAttr = 
this.config.getSpatialAttributes(nerType);
+                    } else if 
(OntologicalClasses.DBPEDIA_ORGANISATION.equals(npNerType)) {
+                       rulesOntologyAttr = 
this.config.getOrgMembershipAttributes(nerType);
+                    }
+
+                    if (valueExistsInEntityAttributes(rulesOntologyAttr, 
entity, npEntity.getId())) {
+                        return true;
+                    }
+                }
+            }
+        }
+
+        /*
+         * 3. Detect any place adjectivals in noun phrases and use them for 
spatial coreference. Any place
+         * adjectivals found should be separate words from the class matches 
from point 1.
+         */
+        PlaceAdjectival placeAdjectival = 
this.dictionaries.findPlaceAdjectival(language, nounPhrase);
+
+        if (placeAdjectival != null
+            && (placeAdjectival.getEnd() < classStart || 
placeAdjectival.getStart() > classEnd)) {
+            /*
+             * We use the same spatial rules ontology attributes as before.
+             */
+            Set<String> rulesOntologyAttr = 
this.config.getSpatialAttributes(ner
+                    
.getAnnotation(NlpAnnotations.NER_ANNOTATION).value().getType());
+
+            if (valueExistsInEntityAttributes(rulesOntologyAttr, entity, 
placeAdjectival.getPlaceUri()
+                    .getUnicodeString())) {
+                return true;
+            }
+        }
+
+        /*
+         * If there was no additional info to do the coref and if the entity 
class matched and has more than 1
+         * word then we consider this a good enough coreference.
+         */
+        if (matchedClass.split("\\s").length > 1) return true;
+
+        return false;
+    }
+
+    /**
+     * Builds a Set of Entity Type labels given the Entity type uris.
+     * 
+     * @param entity
+     * @param language
+     * @return
+     * @throws EngineException
+     */
+    private Set<String> buildEntityTypeLabels(Entity entity, String language) 
throws EngineException {
+        Iterator<Object> typeUris = 
entity.getRepresentation().get(RDF_TYPE.getUnicodeString());
+        Set<String> allTypeLabels = new HashSet<String>();
+
+        while (typeUris.hasNext()) {
+            String typeUri = typeUris.next().toString();
+
+            if (this.config.shouldExcludeClass(typeUri)) continue;
+
+            // First try the in memory index
+            Set<String> labels = this.entityTypeIndex.lookupEntityType(new 
UriRef(typeUri), language);
+
+            if (labels == null) {
+                Site site = getReferencedSite();
+                Entity entityType = (site == null) ? 
this.entityHub.getEntity(typeUri) : site
+                        .getEntity(typeUri);
+
+                if (entityType != null) {
+                    labels = new HashSet<String>();
+                    Iterator<Text> labelIterator = 
entityType.getRepresentation().get(
+                        RDFS_LABEL.getUnicodeString(), language);
+
+                    while (labelIterator.hasNext()) {
+                        labels.add(labelIterator.next().getText());
+                    }
+
+                    this.entityTypeIndex.addEntityType(new UriRef(typeUri), 
language, labels);
+                }
+            }
+            
+            if (labels != null) allTypeLabels.addAll(labels);
+        }
+
+        return allTypeLabels;
+    }
+
+    /**
+     * Checks whether any of the attributes in rulesOntologyAttr from the 
given Entity contain the given
+     * value.
+     * 
+     * @param rulesOntologyAttr
+     * @param entity
+     * @param value
+     * @return
+     */
+    private boolean valueExistsInEntityAttributes(Set<String> 
rulesOntologyAttr, Entity entity, String value) {
+        for (String attribute : rulesOntologyAttr) {
+            Iterator<Object> entityAttributes = 
entity.getRepresentation().get(attribute);
+
+            while (entityAttributes.hasNext()) {
+                Object entityAttribute = entityAttributes.next();
+
+                if (entityAttribute.toString().equals(value)) {
+                    return true;
+                }
+            }
+        }
+
+        return false;
+    }
+
+    /**
+     * Retrieves the configured {@link Site} which holds the NER properties.
+     * 
+     * @return
+     * @throws EngineException
+     */
+    private Site getReferencedSite() throws EngineException {
+        Site site = null;
+
+        if (referencedSiteID != null) { // lookup the referenced site
+            site = siteManager.getSite(referencedSiteID);
+            // ensure that it is present
+            if (site == null) {
+                String msg = String
+                        .format("Unable to enhance because Referenced Site %s 
is currently not active!",
+                            referencedSiteID);
+
+                throw new EngineException(msg);
+            }
+        }
+
+        return site;
+    }
+}

Added: 
stanbol/trunk/enhancement-engines/entitycoreference/src/main/java/org/apache/stanbol/enhancer/engines/entitycoreference/impl/CoreferenceFinderConfig.java
URL: 
http://svn.apache.org/viewvc/stanbol/trunk/enhancement-engines/entitycoreference/src/main/java/org/apache/stanbol/enhancer/engines/entitycoreference/impl/CoreferenceFinderConfig.java?rev=1692320&view=auto
==============================================================================
--- 
stanbol/trunk/enhancement-engines/entitycoreference/src/main/java/org/apache/stanbol/enhancer/engines/entitycoreference/impl/CoreferenceFinderConfig.java
 (added)
+++ 
stanbol/trunk/enhancement-engines/entitycoreference/src/main/java/org/apache/stanbol/enhancer/engines/entitycoreference/impl/CoreferenceFinderConfig.java
 Wed Jul 22 18:58:38 2015
@@ -0,0 +1,151 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.stanbol.enhancer.engines.entitycoreference.impl;
+
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.clerezza.rdf.core.UriRef;
+import 
org.apache.stanbol.enhancer.engines.entitycoreference.datamodel.NounPhrase;
+import org.apache.stanbol.enhancer.servicesapi.rdf.OntologicalClasses;
+import org.osgi.service.cm.ConfigurationException;
+
+/**
+ * Contains configuration parameters for the {@link CoreferenceFinder}.
+ * 
+ * @author Cristian Petroaca
+ * 
+ */
+public class CoreferenceFinderConfig {
+    /**
+     * The maximum distance (in sentence numbers) between a NER and a {@link 
NounPhrase} for which we look for
+     * a coreference.
+     */
+    private int maxDistance;
+
+    /**
+     * The Uris for spatial properties for the NER to be inspected when doing 
the coref spatial match.
+     */
+    private Map<UriRef,Set<String>> spatialAttributes;
+    
+    /**
+     * The Uris for org membership properties for the NER to be inspected when 
doing the coref match.
+     */
+    private Map<UriRef,Set<String>> orgMembershipAttributes;
+
+    /**
+     * Entity classes which will not be used for coreference because they are 
too general.
+     */
+    private Set<String> entityClassesToExclude;
+
+    public CoreferenceFinderConfig(int maxDistance,
+                                                          String 
spatialAttrForPerson,
+                                                          String 
spatialAttrForOrg,
+                                                          String 
spatialAttrForPlace,
+                                                          String 
orgAttrForPerson,
+                                                          String 
entityClassesToExclude) throws ConfigurationException {
+       this.maxDistance = maxDistance;
+       
+       this.spatialAttributes = new HashMap<UriRef,Set<String>>();
+       this.orgMembershipAttributes = new HashMap<UriRef, Set<String>>();
+       
+        if (spatialAttrForPerson != null) {
+               Set<String> attributes = new HashSet<String>();
+               for (String attribute : spatialAttrForPerson.split(",")) {
+                   attributes.add(attribute);
+               }
+               this.spatialAttributes.put(OntologicalClasses.DBPEDIA_PERSON, 
attributes);
+        }
+        
+        if (spatialAttrForOrg != null) {
+               Set<String> attributes = new HashSet<String>();
+               for (String attribute : spatialAttrForOrg.split(",")) {
+                   attributes.add(attribute);
+               }
+               
this.spatialAttributes.put(OntologicalClasses.DBPEDIA_ORGANISATION, attributes);
+        }
+        
+        
+        if (spatialAttrForPlace != null) {
+               Set<String> attributes = new HashSet<String>();
+               for (String attribute : spatialAttrForPlace.split(",")) {
+                   attributes.add(attribute);
+               }
+               this.spatialAttributes.put(OntologicalClasses.DBPEDIA_PLACE, 
attributes);
+        }
+        
+        if (orgAttrForPerson != null) {
+               Set<String> attributes = new HashSet<String>();
+               for (String attribute : orgAttrForPerson.split(",")) {
+                   attributes.add(attribute);
+               }
+               
+               
this.orgMembershipAttributes.put(OntologicalClasses.DBPEDIA_PERSON, attributes);
+        }
+        
+        if (entityClassesToExclude != null) {
+            this.entityClassesToExclude = new HashSet<String>();
+
+            for (String clazz : entityClassesToExclude.split(",")) {
+                this.entityClassesToExclude.add(clazz);
+            }
+        }
+    }
+
+    /**
+     * Gets the max distance parameter.
+     * 
+     * @return
+     */
+    public int getMaxDistance() {
+        return maxDistance;
+    }
+
+    /**
+     * Gets the URIs for the spatial properties for a given Entity Type.
+     * 
+     * @param uri
+     *            of the Entity type for which we want to get the ontology.
+     * @return
+     */
+    public Set<String> getSpatialAttributes(UriRef uri) {
+        return this.spatialAttributes.get(uri);
+    }
+
+    /**
+     * Gets the URIs for the org membership properties for a given Entity Type.
+     * 
+     * @param uri
+     *            of the Entity type for which we want to get the ontology.
+     * @return
+     */
+    public Set<String> getOrgMembershipAttributes(UriRef uri) {
+        return this.orgMembershipAttributes.get(uri);
+    }
+    
+    /**
+     * Checks whether we should exclude the given class based on our config.
+     * 
+     * @param clazz
+     * @return
+     */
+    public boolean shouldExcludeClass(String clazz) {
+        return this.entityClassesToExclude.contains(clazz);
+    }
+}

Added: 
stanbol/trunk/enhancement-engines/entitycoreference/src/main/java/org/apache/stanbol/enhancer/engines/entitycoreference/impl/Dictionaries.java
URL: 
http://svn.apache.org/viewvc/stanbol/trunk/enhancement-engines/entitycoreference/src/main/java/org/apache/stanbol/enhancer/engines/entitycoreference/impl/Dictionaries.java?rev=1692320&view=auto
==============================================================================
--- 
stanbol/trunk/enhancement-engines/entitycoreference/src/main/java/org/apache/stanbol/enhancer/engines/entitycoreference/impl/Dictionaries.java
 (added)
+++ 
stanbol/trunk/enhancement-engines/entitycoreference/src/main/java/org/apache/stanbol/enhancer/engines/entitycoreference/impl/Dictionaries.java
 Wed Jul 22 18:58:38 2015
@@ -0,0 +1,155 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.stanbol.enhancer.engines.entitycoreference.impl;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import org.apache.clerezza.rdf.core.UriRef;
+import org.apache.stanbol.enhancer.engines.entitycoreference.Constants;
+import 
org.apache.stanbol.enhancer.engines.entitycoreference.datamodel.NounPhrase;
+import 
org.apache.stanbol.enhancer.engines.entitycoreference.datamodel.PlaceAdjectival;
+import org.apache.stanbol.enhancer.nlp.model.Span;
+import org.osgi.service.cm.ConfigurationException;
+
+/**
+ * Contains information about several terms and properties of words we use in 
the {@link CoreferenceFinder}.
+ * 
+ * @author Cristian Petroaca
+ * 
+ */
+class Dictionaries {
+    /**
+     * Contains the list of place adjectivals in the form: language -> 
adjectival -> UriRef -> adjectival ->
+     * UriRef There are Places that have multiple adjectivals so in this map 
there are adjectivals that point
+     * to the same UriRef but that ensures a fast lookup.
+     */
+    private Map<String,Map<String,UriRef>> placeAdjectivalsMap;
+    
+    public Dictionaries(String[] languages, String entityUriBase) throws 
ConfigurationException {
+        placeAdjectivalsMap = new HashMap<>();
+
+        for (String language : languages) {
+            String line = null;
+            Map<String,UriRef> languagePlaceAdjMap = new HashMap<>();
+            InputStream langIn = null;
+            BufferedReader reader = null;
+
+            try {
+                langIn = 
Dictionaries.class.getResourceAsStream(Constants.PLACE_ADJECTIVALS_FOLDER + "/"
+                                                                + language);
+                reader = new BufferedReader(new InputStreamReader(langIn));
+
+                while ((line = reader.readLine()) != null) {
+                    String[] splittedLine = line.split("\t");
+                    String place = splittedLine[0];
+                    String adjectivals = splittedLine[1];
+                    UriRef ref = new UriRef(entityUriBase + place.trim());
+                    String[] adjectivalsArray = adjectivals.split(",");
+
+                    for (String adjectival : adjectivalsArray) {
+                        
languagePlaceAdjMap.put(adjectival.trim().toLowerCase(), ref);
+                    }
+                }
+
+                placeAdjectivalsMap.put(language, languagePlaceAdjMap);
+            } catch (IOException ioe) {
+                throw new ConfigurationException("", "Could not read " + 
Constants.PLACE_ADJECTIVALS_FOLDER
+                                                     + "/" + language, ioe);
+            } finally {
+                if (langIn != null) {
+                    try {
+                        langIn.close();
+                    } catch (IOException e) {}
+                }
+
+                if (reader != null) {
+                    try {
+                        reader.close();
+                    } catch (IOException e) {}
+                }
+            }
+        }
+    }
+
+    /**
+     * Checks whether a {@link NounPhrase} contains a place adjectival and 
returns it.
+     * 
+     * @param language
+     * @param nounPhrase
+     * @return the {@link PlaceAdjectival} if the {@link NounPhrase} contains 
one or null if not.
+     */
+    public PlaceAdjectival findPlaceAdjectival(String language, NounPhrase 
nounPhrase) {
+        List<Span> tokens = nounPhrase.getTokens();
+        Map<String,UriRef> langPlaceAdjectivalsMap = 
placeAdjectivalsMap.get(language);
+        /*
+         * Go through all 1-grams and 2-grams and see if we have a match in 
the place adjectivals map. 2-grams
+         * should be good enough since there are no 3-gram places at least 
from what I saw.
+         */
+        for (int i = 0; i < tokens.size(); i++) {
+            Span currentToken = tokens.get(i);
+            String currentTokenString = currentToken.getSpan().toLowerCase();
+            // First the current 1-gram
+            if (langPlaceAdjectivalsMap.containsKey(currentTokenString)) {
+                return new PlaceAdjectival(currentToken.getStart(), 
currentToken.getEnd(),
+                        langPlaceAdjectivalsMap.get(currentTokenString));
+            }
+
+            // Then use the 2-gram with the token before it
+            StringBuilder concatTokens = new StringBuilder();
+            String concatTokensString = null;
+
+            if (i > 0) {
+                Span previousToken = tokens.get(i - 1);
+                String previousTokenString = 
previousToken.getSpan().toLowerCase();
+                concatTokens = new StringBuilder();
+                concatTokens.append(previousTokenString);
+                concatTokens.append(" ");
+                concatTokens.append(currentTokenString);
+                concatTokensString = concatTokens.toString();
+
+                if 
(langPlaceAdjectivalsMap.containsKey(concatTokensString.toLowerCase())) {
+                    return new PlaceAdjectival(previousToken.getStart(), 
currentToken.getEnd(),
+                            langPlaceAdjectivalsMap.get(concatTokensString));
+                }
+            }
+
+            // Now use the 2-gram with the token after it
+            if (i < tokens.size() - 1) {
+                Span nextToken = tokens.get(i + 1);
+                String nextTokenString = nextToken.getSpan().toLowerCase();
+                concatTokens = new StringBuilder();
+                concatTokens.append(currentTokenString);
+                concatTokens.append(" ");
+                concatTokens.append(nextTokenString);
+
+                concatTokensString = concatTokens.toString();
+
+                if 
(langPlaceAdjectivalsMap.containsKey(concatTokens.toString())) {
+                    return new PlaceAdjectival(currentToken.getStart(), 
nextToken.getEnd(),
+                            langPlaceAdjectivalsMap.get(concatTokensString));
+                }
+            }
+        }
+
+        return null;
+    }
+}

Added: 
stanbol/trunk/enhancement-engines/entitycoreference/src/main/java/org/apache/stanbol/enhancer/engines/entitycoreference/impl/InMemoryEntityTypeIndex.java
URL: 
http://svn.apache.org/viewvc/stanbol/trunk/enhancement-engines/entitycoreference/src/main/java/org/apache/stanbol/enhancer/engines/entitycoreference/impl/InMemoryEntityTypeIndex.java?rev=1692320&view=auto
==============================================================================
--- 
stanbol/trunk/enhancement-engines/entitycoreference/src/main/java/org/apache/stanbol/enhancer/engines/entitycoreference/impl/InMemoryEntityTypeIndex.java
 (added)
+++ 
stanbol/trunk/enhancement-engines/entitycoreference/src/main/java/org/apache/stanbol/enhancer/engines/entitycoreference/impl/InMemoryEntityTypeIndex.java
 Wed Jul 22 18:58:38 2015
@@ -0,0 +1,75 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.stanbol.enhancer.engines.entitycoreference.impl;
+
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.clerezza.rdf.core.UriRef;
+
+/**
+ * Memory cache for storing often used Entity Type (Class) information.
+ * 
+ * @author Cristian Petroaca
+ * 
+ */
+public class InMemoryEntityTypeIndex {
+    /**
+     * The index having as key the Uri of the class and the value the set of 
labels ordered by language.
+     */
+    private Map<UriRef,Map<String,Set<String>>> index;
+
+    public InMemoryEntityTypeIndex() {
+        index = new HashMap<UriRef,Map<String,Set<String>>>();
+    }
+
+    /**
+     * Searches for a given class URI for the given language.
+     * 
+     * @param uri
+     * @param language
+     * @return
+     */
+    public Set<String> lookupEntityType(UriRef uri, String language) {
+        Map<String,Set<String>> langMap = index.get(uri);
+
+        if (langMap != null) {
+            return langMap.get(language);
+        }
+
+        return null;
+    }
+
+    /**
+     * Adds a new class URI's labels for the given language.
+     * 
+     * @param uri
+     * @param language
+     * @param labels
+     */
+    public void addEntityType(UriRef uri, String language, Set<String> labels) 
{
+        Map<String,Set<String>> langMap = index.get(uri);
+
+        if (langMap == null) {
+            langMap = new HashMap<String,Set<String>>();
+            index.put(uri, langMap);
+        }
+
+        langMap.put(language, labels);
+    }
+}

Added: 
stanbol/trunk/enhancement-engines/entitycoreference/src/main/java/org/apache/stanbol/enhancer/engines/entitycoreference/impl/NounPhraseFilterer.java
URL: 
http://svn.apache.org/viewvc/stanbol/trunk/enhancement-engines/entitycoreference/src/main/java/org/apache/stanbol/enhancer/engines/entitycoreference/impl/NounPhraseFilterer.java?rev=1692320&view=auto
==============================================================================
--- 
stanbol/trunk/enhancement-engines/entitycoreference/src/main/java/org/apache/stanbol/enhancer/engines/entitycoreference/impl/NounPhraseFilterer.java
 (added)
+++ 
stanbol/trunk/enhancement-engines/entitycoreference/src/main/java/org/apache/stanbol/enhancer/engines/entitycoreference/impl/NounPhraseFilterer.java
 Wed Jul 22 18:58:38 2015
@@ -0,0 +1,139 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.stanbol.enhancer.engines.entitycoreference.impl;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Properties;
+import java.util.Set;
+
+import org.apache.stanbol.enhancer.engines.entitycoreference.Constants;
+import 
org.apache.stanbol.enhancer.engines.entitycoreference.datamodel.NounPhrase;
+import org.apache.stanbol.enhancer.nlp.NlpAnnotations;
+import org.apache.stanbol.enhancer.nlp.model.Span;
+import org.apache.stanbol.enhancer.nlp.model.annotation.Value;
+import org.apache.stanbol.enhancer.nlp.pos.LexicalCategory;
+import org.apache.stanbol.enhancer.nlp.pos.Pos;
+import org.apache.stanbol.enhancer.nlp.pos.PosTag;
+import org.osgi.service.cm.ConfigurationException;
+
+/**
+ * Filters out bad {@link NounPhrase}s based on pos information.
+ * 
+ * @author Cristian Petroaca
+ * 
+ */
+/*
+ * TODO - create a NounPhraseFilterer interface with multiple implementations 
to separate languages with
+ * appositional definite article from the others.
+ */
+public class NounPhraseFilterer {
+    private final static String WITHIN_TEXT_DET_PROP = 
"within.text.referencing.determiners";
+    private final static short MIN_POS_NUMBER = 2;
+
+    /**
+     * Set of determiners based on language which make a {@link NounPhrase} 
valid for being a coref mention.
+     */
+    private Map<String,Set<String>> withinTextRefDeterminers;
+
+    public NounPhraseFilterer(String[] languages) throws 
ConfigurationException {
+        withinTextRefDeterminers = new HashMap<String,Set<String>>();
+
+        for (String language : languages) {
+            Properties props = new Properties();
+            String propertiesFile = Constants.POS_CONFIG_FOLDER + "/" + 
language + ".properties";
+            InputStream in = null;
+
+            try {
+                in = 
NounPhraseFilterer.class.getResourceAsStream(propertiesFile);
+                props.load(in);
+            } catch (IOException e) {
+                throw new ConfigurationException("", "Could not read " + 
propertiesFile);
+            } finally {
+                if (in != null) {
+                    try {
+                        in.close();
+                    } catch (IOException e) {}
+                }
+            }
+
+            String determinersProperty = 
props.getProperty(WITHIN_TEXT_DET_PROP);
+
+            if (determinersProperty == null) {
+                throw new ConfigurationException(WITHIN_TEXT_DET_PROP, 
"Missing property in "
+                                                                       + 
propertiesFile);
+            }
+
+            Set<String> langDeterminerSet = new HashSet<String>();
+            for (String determiner : determinersProperty.split(",")) {
+                langDeterminerSet.add(determiner);
+            }
+
+            withinTextRefDeterminers.put(language, langDeterminerSet);
+        }
+    }
+
+    /**
+     * Filters out noun phrases which do not contain a determiner from the 
given config and do not a token
+     * count bigger than 2 - TODO : should this be configurable to be able to 
also include 1 word noun
+     * phrases?
+     * 
+     * @param nounPhrases
+     * @param language
+     */
+    public void filter(List<NounPhrase> nounPhrases, String language) {
+        Set<String> langDeterminerSet = withinTextRefDeterminers.get(language);
+        Iterator<NounPhrase> it = nounPhrases.iterator();
+
+        while (it.hasNext()) {
+            NounPhrase nounPhrase = it.next();
+            boolean hasGoodDeterminer = false;
+            short nounNo = 0;
+
+            for (Span token : nounPhrase.getTokens()) {
+                Value<PosTag> pos = 
token.getAnnotation(NlpAnnotations.POS_ANNOTATION);
+
+                if (pos != null) {
+                    PosTag posTag = pos.value();
+
+                    if (posTag.hasCategory(LexicalCategory.Noun)
+                        || posTag.hasCategory(LexicalCategory.Adjective)) {
+                        nounNo++;
+                    }
+
+                    if (!hasGoodDeterminer && posTag.hasPos(Pos.Determiner)
+                        && 
langDeterminerSet.contains(token.getSpan().toLowerCase())) {
+                        hasGoodDeterminer = true;
+                    }
+                }
+            }
+
+            if (!hasGoodDeterminer || nounNo < MIN_POS_NUMBER) {
+                it.remove();
+            }
+        }
+    }
+
+    public boolean supportsLanguage(String language) {
+        return withinTextRefDeterminers.containsKey(language);
+    }
+}

Added: 
stanbol/trunk/enhancement-engines/entitycoreference/src/main/resources/OSGI-INF/metatype/metatype.properties
URL: 
http://svn.apache.org/viewvc/stanbol/trunk/enhancement-engines/entitycoreference/src/main/resources/OSGI-INF/metatype/metatype.properties?rev=1692320&view=auto
==============================================================================
--- 
stanbol/trunk/enhancement-engines/entitycoreference/src/main/resources/OSGI-INF/metatype/metatype.properties
 (added)
+++ 
stanbol/trunk/enhancement-engines/entitycoreference/src/main/resources/OSGI-INF/metatype/metatype.properties
 Wed Jul 22 18:58:38 2015
@@ -0,0 +1,61 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+stanbol.enhancer.engine.name.name=Name
+stanbol.enhancer.engine.name.description=The name of the enhancement engine as 
\
+used in the RESTful interface '/engine/<name>'
+
+service.ranking.name=Ranking
+service.ranking.description=If two enhancement engines with the same name are 
active the \
+one with the higher ranking will be used to process parsed content items.
+
+
+#===============================================================================
+#Properties and Options used to configure 
+#===============================================================================
+org.apache.stanbol.enhancer.engines.entitycoreference.EntityCoReferenceEngine.name=Apache
 \
+Stanbol Enhancer Engine: Entity Co-Reference
+org.apache.stanbol.enhancer.engines.entitycoreference.EntityCoReferenceEngine.description=An
 Engine that finds \
+co-references of Named Entities based on dbpedia/yago concepts.
+
+enhancer.engine.entitycoreference.languages.name=Language configuration
+enhancer.engine.entitycoreference.languages.description=Takes a list of ISO \
+  language codes. '*' is the Wildcard; '!{lang}' to exclude a language
+enhancer.engine.entitycoreference.referencedSiteId.name=Referenced Site
+enhancer.engine.entitycoreference.referencedSiteId.description=The ID of the \
+Entityhub Referenced Site holding the Entity Index.
+enhancer.engine.entitycoreference.entity.uri.base.name=Entity URI base
+enhancer.engine.entitycoreference.entity.uri.base.description=The base uri 
which \ 
+is used to represent an Entity.
+enhancer.engine.entitycoreference.maxDistance.name=Max sentence distance
+enhancer.engine.entitycoreference.maxDistance.description=The maximum sentence 
distance between the Ner \ 
+and the noun phrase which mentions it. -1 means no distance constraint.
+
+enhancer.engine.entitycoreference.spatial.attr.person.name=Spatial Attributes 
for Person
+enhancer.engine.entitycoreference.spatial.attr.person.description=Attributes 
used for spatial \
+coreference when dealing with a person entity.
+enhancer.engine.entitycoreference.spatial.attr.org.name=Spatial Attributes for 
Organization
+enhancer.engine.entitycoreference.spatial.attr.org.description=Attributes used 
for spatial \
+coreference when dealing with an organization entity.
+enhancer.engine.entitycoreference.spatial.attr.place.name=Spatial Attributes 
for Place
+enhancer.engine.entitycoreference.spatial.attr.place.description=Attributes 
used for spatial \
+coreference when dealing with a place entity.
+enhancer.engine.entitycoreference.org.attr.person.name=Organisational 
Membership Attributes for Person
+enhancer.engine.entitycoreference.org.attr.person.description=Attributes used 
for organisational \
+membership coreference when dealing with a person entity.
+
+enhancer.engine.entitycoreference.entity.classes.excluded.name=Entity classes 
to be excluded
+enhancer.engine.entitycoreference.entity.classes.excluded.description=Entity 
classes which will \
+be excluded when doing the entity class type matching because they are too 
general in nature.
\ No newline at end of file

Added: 
stanbol/trunk/enhancement-engines/entitycoreference/src/main/resources/config/pos/en.properties
URL: 
http://svn.apache.org/viewvc/stanbol/trunk/enhancement-engines/entitycoreference/src/main/resources/config/pos/en.properties?rev=1692320&view=auto
==============================================================================
--- 
stanbol/trunk/enhancement-engines/entitycoreference/src/main/resources/config/pos/en.properties
 (added)
+++ 
stanbol/trunk/enhancement-engines/entitycoreference/src/main/resources/config/pos/en.properties
 Wed Jul 22 18:58:38 2015
@@ -0,0 +1,2 @@
+# Determiners of a noun phrase which determine that the noun phrase is a good 
candidate for coref.
+within.text.referencing.determiners=the,this,these
\ No newline at end of file

Added: 
stanbol/trunk/enhancement-engines/entitycoreference/src/main/resources/data/place_adjectivals/en
URL: 
http://svn.apache.org/viewvc/stanbol/trunk/enhancement-engines/entitycoreference/src/main/resources/data/place_adjectivals/en?rev=1692320&view=auto
==============================================================================
--- 
stanbol/trunk/enhancement-engines/entitycoreference/src/main/resources/data/place_adjectivals/en
 (added)
+++ 
stanbol/trunk/enhancement-engines/entitycoreference/src/main/resources/data/place_adjectivals/en
 Wed Jul 22 18:58:38 2015
@@ -0,0 +1,236 @@
+Abkhazia       Abkhaz, Abkhazian
+Afghanistan    Afghan
+Albania        Albanian
+Algeria        Algerian
+American_Samoa American Samoan
+Andorra        Andorran
+Angola Angolan
+Anguilla       Anguillan
+Antigua_and_Barbuda    Antiguan, Barbudan
+Argentina      Argentine, Argentinean, Argentinian
+Armenia        Armenian
+Aruba  Aruban
+Australia      Australian
+Austria        Austrian
+Azerbaijan     Azerbaijani, Azeri
+Bahamas        Bahamian
+Bahrain        Bahraini
+Bangladesh     Bangladeshi
+Barbados       Barbadian
+Belarus        Belarusian
+Belgium        Belgian
+Belize Belizean
+Benin  Beninese, Beninois
+Bermuda        Bermudian, Bermudan
+Bhutan Bhutanese
+Bolivia        Bolivian
+Bosnia_and_Herzegovina Bosnian, Bosniak, Herzegovinian
+Botswana       Motswana, Botswanan
+Brazil Brazilian
+British Virgin Islands British Virgin Island
+Brunei Bruneian
+Bulgaria       Bulgarian
+Burkina_Fasoa  Burkinabè
+Burmab Burmese
+Burundi        Burundian
+Cambodia       Cambodian
+Cameroon       Cameroonian
+Canada Canadian
+Cape_Verde     Cape Verdean
+Cayman_Islands Caymanian
+Central_African_Republic       Central African
+Chad   Chadian
+Chile  Chilean
+China  Chinese
+Christmas_Island       Christmas Island
+Cocos_Islands  Cocos Island
+Colombia       Colombian
+Comoros        Comorian
+Congo  Congolese, Congo        
+Cook_Islands   Cook Island, Cook Islands
+Costa_Rica     Costa Rican
+Côte_d'Ivoire  Ivorian
+Croatia        Croatian
+Cuba   Cuban
+Cyprus Cypriot
+Czech_Republic Czech
+Denmark        Danish
+Djibouti       Djiboutian
+Dominica       Dominicand
+Dominican_Republic     Dominicane
+East_Timor     Timorese
+Ecuador        Ecuadorian
+Egypt  Egyptian
+El_Salvador    Salvadoran
+England        English
+Equatorial_Guinea      Equatorial Guinean, Equatoguinean
+Eritrea        Eritrean
+Estonia        Estonian
+Ethiopia       Ethiopian
+Falkland_Islands       Falkland Island
+Faroe_Islands  Faroese
+Fiji   Fijian
+Finland        Finnish
+France French
+French_Guiana  French Guianese
+French_Polynesia       French Polynesian
+Gabon  Gabonese
+Gambia Gambian
+Georgia        Georgian
+Germany        German
+Ghana  Ghanaian
+Gibraltar      Gibraltar
+Great_Britain  British
+Greece Greek, Greciang, Hellenic
+Greenland      Greenlandic
+Grenada        Grenadian
+Guadeloupe     Guadeloupe
+Guam   Guamanian, Guambat
+Guatemala      Guatemalan
+Guinea Guinean 
+Guyana Guyanese
+Haiti  Haitian
+Honduras       Honduran
+Hong_Kong      Hong Kong, Hongkongese
+Hungary        Hungarian, Magyar
+Iceland        Icelandic
+India  Indian
+Indonesia      Indonesian
+Iran   Iranian, Persian
+Iraq   Iraqi
+Ireland        Irish
+Isle_of_Man    Manx
+Israel Israeli
+Italy  Italian, Italic
+Jamaica        Jamaican
+Japan  Japanese
+Jordan Jordanian
+Kazakhstan     Kazakh, Kazakhstani
+Kenya  Kenyan
+Kiribati       I-Kiribati
+North_Korea    North Korean
+South_Korea    South Korean
+Kosovo Kosovar, Kosovan
+Kuwait Kuwaiti
+Kyrgyzstan     Kyrgyzstani, Kyrgyz, Kirgiz, Kirghiz
+Laos   Laotian, Lao
+Latvia Latvian
+Lebanon        Lebanese
+Lesotho        Basotho
+Liberia        Liberian
+Libya  Libyan
+Liechtenstein  Liechtenstein
+Lithuania      Lithuanian
+Luxembourg     Luxembourg, Luxembourgish
+Macau  Macanese, Chinese
+Macedonia      Macedonian
+Madagascar     Malagasy
+Malawi Malawian
+Malaysia       Malaysian
+Maldives       Maldivian
+Mali   Malian
+Malta  Maltese
+Marshall Islands       Marshallese
+Martinique     Martiniquais, Martinican
+Mauritania     Mauritanian
+Mauritius      Mauritian
+Mayotte        Mahoran
+Mexico Mexican
+Micronesia     Micronesian
+Moldova        Moldovan
+Monaco Monégasque, Monacan
+Mongolia       Mongolian
+Montenegro     Montenegrin
+Montserrat     Montserratian
+Morocco        Moroccan
+Mozambique     Mozambican
+Namibia        Namibian
+Nauru  Nauruan
+Nepal  Nepalese, Nepali
+Netherlands    Dutch, Netherlandic
+New_Caledonia  New Caledonian
+New_Zealand    New Zealand, NZ
+Nicaragua      Nicaraguan
+Niue   Niuean
+Niger  Nigerien
+Nigeria        Nigerian
+Norway Norwegian
+Northern_Ireland       Northern Irish, Irish
+Northern_Marianas      Northern Marianan
+Oman   Omani
+Pakistan       Pakistani
+Palestine      Palestinian
+Palau  Palauan
+Panama Panamanian
+Papua_New_Guinea       Papua New Guinean, Papuan
+Paraguay       Paraguayan
+Peru   Peruvian
+Philippines    Philippine, Filipino
+Pitcairn_Island        Pitcairn Island
+Poland Polish
+Portugal       Portuguese
+Puerto_Rico    Puerto Rican
+Qatar  Qatari
+Ireland        Irish
+Réunion        Réunionese, Réunionnais
+Romania        Romanian
+Russia Russian
+Rwanda Rwandan
+St._Helena     St. Helenian
+St._Kitts_and_Nevis    Kittitian, Nevisian
+St._Lucia      St. Lucian
+Saint-Pierre_and_Miquelon      Saint-Pierrais, Miquelonnais
+St._Vincent_and_the_Grenadines St. Vincentian, Vincentian
+Samoa  Samoan
+San_Marino     Sammarinese
+São_Tomé_and_Príncipe  São Toméan
+Saudi_Arabia   Saudi, Saudi Arabian
+Scotland       Scots, Scottish, Scotchi
+Senegal        Senegalese
+Serbia Serbian
+Seychelles     Seychellois
+Sierra_Leone   Sierra Leonean
+Singapore      Singaporean
+Slovakia       Slovak
+Slovenia       Slovenian, Slovene
+Solomon_Islands        Solomon Island
+Somalia        Somali, Somalian
+South_Africa   South African
+South_Ossetia  South Ossetian
+South_Sudan    South Sudanese
+Spain  Spanish
+Sri_Lanka      Sri Lankan
+Sudan  Sudanese
+Surinam        Surinamese
+Swaziland      Swazi
+Sweden Swedish
+Switzerland    Swiss
+Syria  Syrian
+Taiwan Taiwanese
+Tajikistan     Tajikistani
+Tanzania       Tanzanian
+Thailand       Thai
+Togo   Togolese
+Tonga  Tongan
+Trinidad_and_Tobago    Trinidadian, Tobagonian
+Tunisia        Tunisian
+Turkey Turkish
+Turkmenistan   Turkmen
+Tuvalu Tuvaluan
+Uganda Ugandan
+Ukraine        Ukrainian
+United_Arab_Emirates   Emirati, Emirian
+United_Kingdom British, UK
+United_States  American, US
+Uruguay        Uruguayan
+Uzbekistan     Uzbekistani, Uzbek
+Vanuatu        Ni-Vanuatu, Vanuatuan
+Venezuela      Venezuelan
+Vietnam        Vietnamese
+Virgin_Islands Virgin Island
+Wales  Welsh
+Wallis_and_Futuna      Wallisian, Futunan
+Western_Sahara Sahraw, Sahrawian, Sahraouian
+Yemen  Yemeni
+Zambia Zambian
+Zimbabwe       Zimbabwean
\ No newline at end of file

Added: 
stanbol/trunk/enhancement-engines/entitycoreference/src/test/resources/log4j.properties
URL: 
http://svn.apache.org/viewvc/stanbol/trunk/enhancement-engines/entitycoreference/src/test/resources/log4j.properties?rev=1692320&view=auto
==============================================================================
--- 
stanbol/trunk/enhancement-engines/entitycoreference/src/test/resources/log4j.properties
 (added)
+++ 
stanbol/trunk/enhancement-engines/entitycoreference/src/test/resources/log4j.properties
 Wed Jul 22 18:58:38 2015
@@ -0,0 +1,24 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Root logger option
+log4j.rootLogger=INFO, stdout
+ 
+# Direct log messages to stdout
+log4j.appender.stdout=org.apache.log4j.ConsoleAppender
+log4j.appender.stdout.Target=System.out
+log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
+log4j.appender.stdout.layout.ConversionPattern=%d{ABSOLUTE} %5p %c{1}:%L - %m%n
+log4j.logger.org.apache.stanbol.enhancer.engines.keywordextraction=DEBUG
\ No newline at end of file

Modified: stanbol/trunk/enhancement-engines/pom.xml
URL: 
http://svn.apache.org/viewvc/stanbol/trunk/enhancement-engines/pom.xml?rev=1692320&r1=1692319&r2=1692320&view=diff
==============================================================================
--- stanbol/trunk/enhancement-engines/pom.xml (original)
+++ stanbol/trunk/enhancement-engines/pom.xml Wed Jul 22 18:58:38 2015
@@ -114,9 +114,8 @@
     <module>geonames</module> <!-- http://geonames.org -->
     <module>opencalais</module> <!-- http://opencalais.com/ -->
     <module>zemanta</module> <!-- htt://zemanta.com -->
-       
-    <!-- The entity co-refernece engine is not yet in trunk -->
-         <!-- module>entitycoreference</module -->
+
+       <module>entitycoreference</module>
   </modules>
 
   <build>

svn commit: r1692320 [2/2] - in /stanbol/trunk: data/ data/sites/entity-coref-dbpedia/ data/sites/entity-coref-dbpedia/dbpedia_yago_classes/ data/sites/entity-coref-dbpedia/src/ data/sites/entity-coref-dbpedia/src/main/ data/sites/entity-coref-dbpedia/...

Reply via email to