http://www.mediawiki.org/wiki/Special:Code/MediaWiki/67027
Revision: 67027 Author: daniel Date: 2010-05-28 20:05:51 +0000 (Fri, 28 May 2010) Log Message: ----------- track disambig property vectors Modified Paths: -------------- trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/CoherenceDisambiguator.java trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/SlidingCoherenceDisambiguator.java Modified: trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/CoherenceDisambiguator.java =================================================================== --- trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/CoherenceDisambiguator.java 2010-05-28 19:44:06 UTC (rev 67026) +++ trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/CoherenceDisambiguator.java 2010-05-28 20:05:51 UTC (rev 67027) @@ -32,20 +32,32 @@ public static class CoherenceDisambiguation<T extends TermReference, C extends LocalConcept> extends Disambiguator.Disambiguation<T, C> { protected LabeledVector<Integer> centroid; + protected Map<Integer, ConceptFeatures<C, Integer>> features; - public CoherenceDisambiguation(Interpretation<T, C> interpretation, LabeledVector<Integer> centroid, double score, String description) { + public CoherenceDisambiguation(Interpretation<T, C> interpretation, Map<Integer, ConceptFeatures<C, Integer>> features, LabeledVector<Integer> centroid, double score, String description) { super(interpretation, score, description); this.centroid = centroid; + this.features = features; } - public CoherenceDisambiguation(Map<T, C> meanings, List<T> sequence, LabeledVector<Integer> centroid, double score, String description) { + public CoherenceDisambiguation(Map<T, C> meanings, List<T> sequence, Map<Integer, ConceptFeatures<C, Integer>> features, LabeledVector<Integer> centroid, double score, String description) { super(meanings, sequence, score, description); this.centroid = centroid; + this.features = features; } public LabeledVector<Integer> getCentroid() { return centroid; } + + public Map<Integer, ConceptFeatures<C, Integer>> getFeatures() { + return features; + } + + public ConceptFeatures<C, Integer> getFeature(int concept) { + return getFeatures().get(concept); + } + } protected int minPopularity = 2; //FIXME: use complex cutoff specifier! @@ -217,7 +229,7 @@ * @see de.brightbyte.wikiword.disambig.Disambiguator#disambiguate(java.util.List) */ public <X extends TermReference>CoherenceDisambiguation<X, LocalConcept> disambiguate(PhraseNode<X> root, Map<X, List<? extends LocalConcept>> meanings, Collection<? extends LocalConcept> context) throws PersistenceException { - if (meanings.isEmpty()) return new CoherenceDisambiguation<X, LocalConcept>(Collections.<X, LocalConcept>emptyMap(), Collections.<X>emptyList(), new MapLabeledVector<Integer>(), 0.0, "no terms or meanings"); + if (meanings.isEmpty()) return new CoherenceDisambiguation<X, LocalConcept>(Collections.<X, LocalConcept>emptyMap(), Collections.<X>emptyList(), Collections.<Integer, ConceptFeatures<LocalConcept, Integer>>emptyMap(), new MapLabeledVector<Integer>(), 0.0, "no terms or meanings"); LabeledMatrix<LocalConcept, LocalConcept> similarities = new MapLabeledMatrix<LocalConcept, LocalConcept>(true); FeatureFetcher<LocalConcept, Integer> features = getFeatureCache(meanings, context); @@ -384,11 +396,12 @@ int c = interp.getSequence().size(); if (c == 0) { - CoherenceDisambiguation<X, LocalConcept> r = new CoherenceDisambiguation<X, LocalConcept>(interp.getMeanings(), interp.getSequence(), new MapLabeledVector<Integer>(), 0, "empty"); + CoherenceDisambiguation<X, LocalConcept> r = new CoherenceDisambiguation<X, LocalConcept>(interp.getMeanings(), interp.getSequence(), Collections.<Integer, ConceptFeatures<LocalConcept, Integer>>emptyMap(), new MapLabeledVector<Integer>(), 0, "empty"); return r; } LabeledVector<Integer> sum = new MapLabeledVector<Integer>(); + Map<Integer, ConceptFeatures<LocalConcept, Integer>> disambigFeatures = new HashMap<Integer, ConceptFeatures<LocalConcept, Integer>>(); double sim = 0, pop = 0, weight = 0; int i=0, j=0; for (Map.Entry<? extends TermReference, LocalConcept> ea: concepts.entrySet()) { @@ -399,6 +412,7 @@ if (a==null) continue; ConceptFeatures<LocalConcept, Integer> fa = features.getFeatures(a); + disambigFeatures.put(a.getId(), fa); sum.add(fa.getFeatureVector()); j=0; @@ -483,7 +497,7 @@ double score = scoreCombiner.apply(simf, popf); if (score<0 || score>1) throw new SanityException("encountered insane score ("+score+"); check scoreCombiner!"); - CoherenceDisambiguation<X, LocalConcept> r = new CoherenceDisambiguation<X, LocalConcept>(interp.getMeanings(), interp.getSequence(), centroid, score, "simf="+simf+", popf="+popf+", sim="+sim+", pop="+pop+", weight="+weight); + CoherenceDisambiguation<X, LocalConcept> r = new CoherenceDisambiguation<X, LocalConcept>(interp.getMeanings(), interp.getSequence(), disambigFeatures, centroid, score, "simf="+simf+", popf="+popf+", sim="+sim+", pop="+pop+", weight="+weight); return r; } Modified: trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/SlidingCoherenceDisambiguator.java =================================================================== --- trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/SlidingCoherenceDisambiguator.java 2010-05-28 19:44:06 UTC (rev 67026) +++ trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/SlidingCoherenceDisambiguator.java 2010-05-28 20:05:51 UTC (rev 67027) @@ -11,12 +11,10 @@ import de.brightbyte.data.LabeledVector; import de.brightbyte.data.MapLabeledMatrix; import de.brightbyte.data.MapLabeledVector; -import de.brightbyte.data.measure.CosineVectorSimilarity; import de.brightbyte.data.measure.Measure; -import de.brightbyte.data.measure.ScalarVectorSimilarity; import de.brightbyte.data.measure.Similarity; import de.brightbyte.util.PersistenceException; -import de.brightbyte.wikiword.disambig.Disambiguator.Disambiguation; +import de.brightbyte.wikiword.model.ConceptFeatures; import de.brightbyte.wikiword.model.LocalConcept; import de.brightbyte.wikiword.model.PhraseNode; import de.brightbyte.wikiword.model.TermReference; @@ -74,7 +72,7 @@ * @see de.brightbyte.wikiword.disambig.Disambiguator#disambiguate(java.util.List) */ public <X extends TermReference>CoherenceDisambiguation<X, LocalConcept> disambiguate(PhraseNode<X> root, Map<X, List<? extends LocalConcept>> meanings, Collection<? extends LocalConcept> context) throws PersistenceException { - if (meanings.isEmpty()) return new CoherenceDisambiguation<X, LocalConcept>(Collections.<X, LocalConcept>emptyMap(), Collections.<X>emptyList(), new MapLabeledVector<Integer>(), 0.0, "no terms or meanings"); + if (meanings.isEmpty()) return new CoherenceDisambiguation<X, LocalConcept>(Collections.<X, LocalConcept>emptyMap(), Collections.<X>emptyList(), Collections.<Integer, ConceptFeatures<LocalConcept, Integer>>emptyMap(), new MapLabeledVector<Integer>(), 0.0, "no terms or meanings"); int sz = meanings.size(); if (context!=null) sz += context.size(); _______________________________________________ MediaWiki-CVS mailing list MediaWiki-CVS@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs