Hi Pablo,

I'm using confidence=0.2 and support=20 for all experiments.

I've changed the spotter dictionary to 
surface_forms-Wikipedia-TitRedDis.thresh3.spotterDictionary from the 
release-0.4 directory, but still no luck.

here's my result
<a href="http://dbpedia.org/resource/Presidency_of_Barack_Obama"; 
title="http://dbpedia.org/resource/Presidency_of_Barack_Obama"; 
target="_blank">President Obama</a> called Wednesday on <a 
href="http://dbpedia.org/resource/United_States_Congress"; 
title="http://dbpedia.org/resource/United_States_Congress"; 
target="_blank">Congress</a> to extend a <a 
href="http://dbpedia.org/resource/Tax_break"; 
title="http://dbpedia.org/resource/Tax_break"; target="_blank">tax break</a> for 
<a href="http://dbpedia.org/resource/Student"; 
title="http://dbpedia.org/resource/Student"; target="_blank">students</a> 
included in last year's <a 
href="http://dbpedia.org/resource/Economic_Stimulus_Act_of_2008"; 
title="http://dbpedia.org/resource/Economic_Stimulus_Act_of_2008"; 
target="_blank">economic stimulus</a> package, arguing that the <a 
href="http://dbpedia.org/resource/Policy"; 
title="http://dbpedia.org/resource/Policy"; target="_blank">policy</a> provides 
more generous <a href="http://dbpedia.org/resource/American_Student_Assistance"; 
title="http://dbpedia.org/resource/American_Student_Assistance"; 
target="_blank">assistance</a>.and the one from http://spotlight.dbpedia.org
<a href="http://dbpedia.org/resource/Presidency_of_Barack_Obama"; 
title="http://dbpedia.org/resource/Presidency_of_Barack_Obama"; 
target="_blank">President Obama</a> called <a 
href="http://dbpedia.org/resource/Sheffield_Wednesday_F.C."; 
title="http://dbpedia.org/resource/Sheffield_Wednesday_F.C."; 
target="_blank">Wednesday</a> on <a 
href="http://dbpedia.org/resource/United_States_Congress"; 
title="http://dbpedia.org/resource/United_States_Congress"; 
target="_blank">Congress</a> to extend a tax break for <a 
href="http://dbpedia.org/resource/Student"; 
title="http://dbpedia.org/resource/Student"; target="_blank">students</a> 
included in last <a href="http://dbpedia.org/resource/University"; 
title="http://dbpedia.org/resource/University"; target="_blank">year</a>'s 
economic stimulus <a href="http://dbpedia.org/resource/Packaging_and_labeling"; 
title="http://dbpedia.org/resource/Packaging_and_labeling"; 
target="_blank">package</a>, arguing that the <a 
href="http://dbpedia.org/resource/Policy"; 
title="http://dbpedia.org/resource/Policy"; target="_blank">policy</a> provides 
more generous <a href="http://dbpedia.org/resource/Assistance_dog"; 
title="http://dbpedia.org/resource/Assistance_dog"; 
target="_blank">assistance</a>.Given the fact that my local result is more 
'correct' in terms of the annotations I am wondering if I am employing a 
different (better) set of data and/or processes. The other thing which makes me 
suspect that I using more accurate, but costly, processes is that 
spotlight.dbpedia.org is about twice as fast as my local implementation, 
despite the fact I'm running it on a dedicated server with 8 cores and 32GB of 
RAM.

N

Below is my full server.properties file can you indicate any deviations from 
yours...



# Server hostname and port to be used by DBpedia Spotlight REST API
org.dbpedia.spotlight.web.rest.uri = http://localhost:2222/rest

# Internationalization (i18n) support -- work in progress
org.dbpedia.spotlight.default_namespace = http://dbpedia.org/resource/
org.dbpedia.spotlight.default_ontology= http://dbpedia.org/ontology/
# Defines the languages the system should support.
org.dbpedia.spotlight.language = English
org.dbpedia.spotlight.language_i18n_code = en
# Stop word list
# An example can be downloaded from: 
http://spotlight.dbpedia.org/download/release-0.4/stopwords.en.list
org.dbpedia.spotlight.data.stopWords.english = data/stopwords.en.list
org.dbpedia.spotlight.data.stopWords.portuguese = data/stopwords.pt.list

#----- SPOTTING -------

# Comma-separated list of spotters to load.
# Accepted values are 
LingPipeSpotter,WikiMarkupSpotter,AtLeastOneNounSelector,CoOccurrenceBasedSelector,NESpotter,OpenNLPNGramSpotter,OpenNLPChunkerSpotter,KeaSpotter
# Some spotters may require extra files and config parameters. See 
org.dbpedia.spotlight.model.SpotterConfiguration
org.dbpedia.spotlight.spot.spotters = LingPipeSpotter,WikiMarkupSpotter
org.dbpedia.spotlight.spot.selectors = ShortSurfaceFormSelector

# Path to serialized LingPipe dictionary used by LingPipeSpotter
org.dbpedia.spotlight.spot.dictionary = 
data/surface_forms-Wikipedia-TitRedDis.thresh3.spotterDictionary
org.dbpedia.spotlight.spot.allowOverlap = false
org.dbpedia.spotlight.spot.caseSensitive = false

# Configurations for the CoOccurrenceBasedSelector
# From: http://spotlight.dbpedia.org/download/release-0.5/spot_selector.tgz
org.dbpedia.spotlight.spot.cooccurrence.datasource = ukwac
org.dbpedia.spotlight.spot.cooccurrence.database.jdbcdriver = 
org.hsqldb.jdbcDriver
org.dbpedia.spotlight.spot.cooccurrence.database.connector = 
jdbc:hsqldb:file:data/spotsel/ukwac_candidate;shutdown=true&readonly=true
org.dbpedia.spotlight.spot.cooccurrence.database.user = sa
org.dbpedia.spotlight.spot.cooccurrence.database.password =
org.dbpedia.spotlight.spot.cooccurrence.classifier.unigram = 
data/spotsel/ukwac_unigram.model
org.dbpedia.spotlight.spot.cooccurrence.classifier.ngram = 
data/spotsel/ukwac_ngram.model

# Path to serialized HMM model for LingPipe-based POS tagging. Required by 
AtLeastOneNounSelector and CoOccurrenceBasedSelector
org.dbpedia.spotlight.tagging.hmm = data/pos-en-general-brown.HiddenMarkovModel

# Path to dir containing several OpenNLP models for NER, chunking, etc. This is 
required for spotters that are based on OpenNLP.
# Can be downloaded from 
http://spotlight.dbpedia.org/download/release-0.5/opennlp_models.tgz
org.dbpedia.spotlight.spot.opennlp.dir = data/opennlp
org.dbpedia.spotlight.spot.opennlp.person= http://dbpedia.org/ontology/Person
org.dbpedia.spotlight.spot.opennlp.organization=http://dbpedia.org/ontology/Organisation
org.dbpedia.spotlight.spot.opennlp.location=http://dbpedia.org/ontology/Place


# EXPERIMENTAL! Path to Kea Model
org.dbpedia.spotlight.spot.kea.model = data/kea/keaModel-1-3-1

#EXPERIMENTAL! AhoCorasick Spotter
org.dbpedia.spotlight.spot.ahocorasick.surfaceforms=data/surfaceforms.set


#----- CANDIDATE SELECTION -------

# Choose between jdbc or lucene for DBpedia Resource creation. Also, if the 
jdbc throws an error, lucene will be used.
org.dbpedia.spotlight.core.database = lucene
org.dbpedia.spotlight.core.database.jdbcdriver = org.hsqldb.jdbcDriver
org.dbpedia.spotlight.core.database.connector = 
jdbc:hsqldb:file:data/database/spotlight-db;shutdown=true&readonly=true
org.dbpedia.spotlight.core.database.user = sa
org.dbpedia.spotlight.core.database.password =

# From 
http://spotlight.dbpedia.org/download/release-0.5/candidate-index-full.tgz
org.dbpedia.spotlight.candidateMap.dir = data/candidateIndexTitRedDis
org.dbpedia.spotlight.candidateMap.loadToMemory = true
# Path to Lucene index containing only the candidate map. It is used by 
document-oriented disambiguators such as Document,TwoStepDisambiguator
# Only used if one such disambiguator is loaded. Data is at: 
http://spotlight.dbpedia.org/download/release-0.5/candidate-index-full.tgz
#org.dbpedia.spotlight.candidateMap.dir = 
dist/src/deb/control/data/usr/share/dbpedia-spotlight/index


#----- DISAMBIGUATION -------

# List of disambiguators to load: Document,Occurrences,CuttingEdge,Default
org.dbpedia.spotlight.disambiguate.disambiguators = Default,Document

# Path to a directory containing Lucene index files. These can be downloaded 
from the website or created by 
org.dbpedia.spotlight.lucene.index.IndexMergedOccurrences
org.dbpedia.spotlight.index.dir = data/index-withSF-withTypes-compressed
# Will attempt to load into RAM (the potentially huge) index from 
"org.dbpedia.spotlight.index.dir"
org.dbpedia.spotlight.index.loadToMemory = true
# Class used to process context around DBpedia mentions (tokenize, stem, etc.)
org.dbpedia.spotlight.lucene.analyzer = 
org.apache.lucene.analysis.en.EnglishAnalyzer
org.dbpedia.spotlight.lucene.version = LUCENE_36
# How large can the cache be for ICFDisambiguator.
jcs.default.cacheattributes.MaxObjects = 15000


#----- LINKING / FILTERING  -------

# Configuration for SparqlFilter
org.dbpedia.spotlight.sparql.endpoint = http://dbpedia.org/sparql
org.dbpedia.spotlight.sparql.graph = http://dbpedia.org



                                          
------------------------------------------------------------------------------
Everyone hates slow websites. So do we.
Make your web apps faster with AppDynamics
Download AppDynamics Lite for free today:
http://p.sf.net/sfu/appdyn_d2d_feb
_______________________________________________
Dbp-spotlight-users mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/dbp-spotlight-users

Reply via email to