Revision: 17459
http://sourceforge.net/p/gate/code/17459
Author: adamfunk
Date: 2014-02-26 21:09:57 +0000 (Wed, 26 Feb 2014)
Log Message:
-----------
Alas, what to do with the raw scores?
Modified Paths:
--------------
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/AbstractBank.java
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/AbstractPairbank.java
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/AbstractTermbank.java
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/AnnotationTermbank.java
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/DocumentFrequencyBank.java
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/HyponymyTermbank.java
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/output/CsvGenerator.java
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/output/PairCsvGenerator.java
Modified:
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/AbstractBank.java
===================================================================
---
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/AbstractBank.java
2014-02-26 20:51:10 UTC (rev 17458)
+++
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/AbstractBank.java
2014-02-26 21:09:57 UTC (rev 17459)
@@ -40,7 +40,7 @@
return this.documentCount;
}
- public abstract void saveAsCsv(double threshold, File file)
+ public abstract void saveAsCsv(Number threshold, File file)
throws GateException;
public abstract void saveAsCsv(File file)
Modified:
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/AbstractPairbank.java
===================================================================
---
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/AbstractPairbank.java
2014-02-26 20:51:10 UTC (rev 17458)
+++
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/AbstractPairbank.java
2014-02-26 21:09:57 UTC (rev 17459)
@@ -181,7 +181,7 @@
/* Methods for saving as CSV */
- public void saveAsCsv(double threshold, File outputFile) throws
GateException {
+ public void saveAsCsv(Number threshold, File outputFile) throws
GateException {
PairCsvGenerator generator = new PairCsvGenerator();
generator.generateAndSaveCsv(this, threshold, outputFile);
}
Modified:
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/AbstractTermbank.java
===================================================================
---
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/AbstractTermbank.java
2014-02-26 20:51:10 UTC (rev 17458)
+++
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/AbstractTermbank.java
2014-02-26 21:09:57 UTC (rev 17459)
@@ -77,16 +77,21 @@
return this.scoreTypes;
}
- // TODO : make this abstract and implement it
- // everywhere as part of the overhaul
+
public Number getScore(ScoreType type, Term term) {
- return 0.0;
+ Map<Term, Number> mainScores = this.getScores();
+ if (mainScores.containsKey(term)) {
+ return mainScores.get(term);
+ }
+
+ // implied else
+ return 0;
}
- // TODO : make this abstract and implement it
- // everywhere as part of the overhaul
+
public Collection<Term> getTerms() {
- return new HashSet<Term>();
+ Map<Term, Number> mainScores = this.getScores();
+ return mainScores.keySet();
}
@@ -94,11 +99,12 @@
return this.scoreTypes.get(0);
}
+
protected abstract void initializeScoreTypes();
public List<Term> getTermsByDescendingScore() {
- // lazy calculation
+ // lazy computation
if (! descendingScoresDone) {
termsByDescendingScore = new ArrayList<Term>(this.getTerms());
Collections.sort(termsByDescendingScore, new
TermComparatorByDescendingScore(scores.get(this.getDefaultScoreType())));
@@ -108,10 +114,12 @@
}
+ @Deprecated
public Map<Term, Double> getTermScores() {
return this.termScores;
}
+
public Map<Term, Set<String>> getTermDocuments() {
return this.termDocuments;
}
@@ -126,20 +134,28 @@
return new HashSet<String>();
}
-
+ @Deprecated
public Map<Term, Integer> getTermFrequencies() {
return this.termFrequencies;
}
+ @Deprecated
public Map<Term, Integer> getDocFrequencies() {
return this.docFrequencies;
}
+ @Deprecated
public String getFreqProperty() {
return freqProperty;
}
+
+
+ public Map<Term, Number> getScores() {
+ return this.scores.get(this.getDefaultScoreType());
+ }
- public Double getMinScore() {
+
+ public Number getMinScore() {
if (this.termScores.isEmpty()) {
return 1.0;
}
@@ -147,7 +163,7 @@
return Collections.min(this.termScores.values());
}
- public Double getMaxScore() {
+ public Number getMaxScore() {
if (this.termScores.isEmpty()) {
return 1.0;
}
@@ -215,14 +231,14 @@
-
+ @Deprecated
protected int incrementTermFreq(Term term, int increment) {
return Utilities.incrementMap(termFrequencies, term, increment);
}
-
+ // TODO: change to use getMainScores()
public Double getScore(Term term) {
if (termScores.containsKey(term)) {
return termScores.get(term).doubleValue();
@@ -233,6 +249,7 @@
}
+ @Deprecated
public Double getRawScore(Term term) {
if (rawTermScores.containsKey(term)) {
return rawTermScores.get(term).doubleValue();
@@ -246,7 +263,7 @@
/* Methods for saving as CSV */
- public void saveAsCsv(double threshold, File outputFile) throws
GateException {
+ public void saveAsCsv(Number threshold, File outputFile) throws
GateException {
CsvGenerator.generateAndSaveCsv(this, threshold, outputFile);
}
Modified:
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/AnnotationTermbank.java
===================================================================
---
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/AnnotationTermbank.java
2014-02-26 20:51:10 UTC (rev 17458)
+++
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/AnnotationTermbank.java
2014-02-26 21:09:57 UTC (rev 17459)
@@ -47,7 +47,7 @@
Term term = makeTerm(candidate, document);
FeatureMap fm = candidate.getFeatures();
if (fm.containsKey(inputScoreFeature)) {
- incrementTermFreq(term, 1);
+ Utilities.incrementScoreTermValue(scores, termFrequencyST, term, 1);
double score = ((Number) fm.get(inputScoreFeature)).doubleValue();
Utilities.addToMapSet(termDocuments, term, documentSource);
@@ -72,6 +72,7 @@
for (Term term : termIndividualScores.keySet()) {
score = MergingMode.calculate(mergingMode,
termIndividualScores.get(term));
Utilities.setScoreTermValue(scores, getDefaultScoreType(), term, score);
+ Utilities.setScoreTermValue(scores, localDocFrequencyST, term,
termDocuments.size());
}
if (debugMode) {
Modified:
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/DocumentFrequencyBank.java
===================================================================
---
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/DocumentFrequencyBank.java
2014-02-26 20:51:10 UTC (rev 17458)
+++
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/DocumentFrequencyBank.java
2014-02-26 21:09:57 UTC (rev 17459)
@@ -251,7 +251,7 @@
@Override
- public void saveAsCsv(double threshold, File file) throws GateException {
+ public void saveAsCsv(Number threshold, File file) throws GateException {
CsvGenerator.generateAndSaveCsv(this, threshold, file);
}
Modified:
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/HyponymyTermbank.java
===================================================================
---
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/HyponymyTermbank.java
2014-02-26 20:51:10 UTC (rev 17458)
+++
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/HyponymyTermbank.java
2014-02-26 21:09:57 UTC (rev 17459)
@@ -16,7 +16,9 @@
import gate.*;
import gate.termraider.modes.*;
import gate.termraider.util.*;
+
import org.apache.commons.lang.StringEscapeUtils;
+
import java.util.*;
@@ -38,6 +40,7 @@
/* EXTRA DATA FOR ANALYSIS */
private Map<Term, Set<String>> termHeads;
private Map<Term, Set<String>> termHyponyms;
+ private ScoreType termFrequencyST, localDocFrequencyST;
/* Methods for the debugging GUI to get the data */
@@ -77,7 +80,7 @@
Utilities.addToMapSet(termDocuments, term, documentSource);
Utilities.addToMapSet(termHeads, term, head);
- incrementTermFreq(term, 1);
+ Utilities.incrementScoreTermValue(scores, termFrequencyST, term, 1);
}
}
@@ -126,11 +129,9 @@
rawTermScores.put(term, rawScore);
double score = Normalization.normalizeScore(rawScore);
termScores.put(term, score);
+ Utilities.setScoreTermValue(scores, localDocFrequencyST, term,
this.termDocuments.size());
}
- termsByDescendingScore = new ArrayList<Term>(termScores.keySet());
- Collections.sort(termsByDescendingScore, new
TermComparatorByDescendingScore(termScores));
-
if (debugMode) {
System.out.println("Termbank: nbr of terms = " +
termsByDescendingScore.size());
}
@@ -152,13 +153,10 @@
protected void initializeScoreTypes() {
this.scoreTypes = new ArrayList<ScoreType>();
this.scoreTypes.add(new ScoreType(scoreProperty));
- // TODO this TB needs a whole different kettle of fish
- //this.termFrequencyST = new ScoreType("termFrequency");
- //this.scoreTypes.add(termFrequencyST);
- //this.localDocFrequencyST = new ScoreType("localDocFrequency");
- //this.scoreTypes.add(localDocFrequencyST);
- //this.refDocFrequencyST = new ScoreType("refDocFrequency");
- //this.scoreTypes.add(refDocFrequencyST);
+ this.termFrequencyST = new ScoreType("termFrequency");
+ this.scoreTypes.add(termFrequencyST);
+ this.localDocFrequencyST = new ScoreType("localDocFrequency");
+ this.scoreTypes.add(localDocFrequencyST);
}
Modified:
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/output/CsvGenerator.java
===================================================================
---
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/output/CsvGenerator.java
2014-02-26 20:51:10 UTC (rev 17458)
+++
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/output/CsvGenerator.java
2014-02-26 21:09:57 UTC (rev 17459)
@@ -24,7 +24,6 @@
public static void generateAndSaveCsv(AbstractTermbank bank,
Number threshold, File outputFile) throws GateException {
PrintWriter writer = initializeWriter(outputFile);
- Map<Term, Double> termScores = bank.getTermScores();
addComment(bank, "threshold = " + threshold);
List<Term> sortedTerms = bank.getTermsByDescendingScore();
@@ -33,7 +32,7 @@
writer.println(bank.getCsvHeader());
for (Term term : sortedTerms) {
- Double score = termScores.get(term);
+ Double score = bank.getScore(term);
if (score >= threshold.doubleValue()) {
writer.println(bank.getCsvLine(term));
written++;
Modified:
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/output/PairCsvGenerator.java
===================================================================
---
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/output/PairCsvGenerator.java
2014-02-26 20:51:10 UTC (rev 17458)
+++
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/output/PairCsvGenerator.java
2014-02-26 21:09:57 UTC (rev 17459)
@@ -26,7 +26,7 @@
private String scorePropertyName;
public void generateAndSaveCsv(AbstractPairbank pairbank,
- double threshold, File outputFile) throws GateException {
+ Number threshold, File outputFile) throws GateException {
this.pairbank = pairbank;
this.debugMode = pairbank.getDebugMode();
this.scorePropertyName = pairbank.getScoreProperty();
@@ -41,7 +41,7 @@
}
- private void generateCsv(PrintWriter writer, double threshold) {
+ private void generateCsv(PrintWriter writer, Number threshold) {
Map<UnorderedTermPair, Double> scores = pairbank.getScores();
List<UnorderedTermPair> pairs = new
ArrayList<UnorderedTermPair>(scores.keySet());
Collections.sort(pairs, new TermPairComparatorByDescendingScore(scores));
@@ -51,7 +51,7 @@
writeHeader(writer);
for (UnorderedTermPair pair: pairs) {
double score = scores.get(pair);
- if (score < threshold) {
+ if (score < threshold.doubleValue()) {
break;
}
This was sent by the SourceForge.net collaborative development platform, the
world's largest Open Source development site.
------------------------------------------------------------------------------
Flow-based real-time traffic analytics software. Cisco certified tool.
Monitor traffic, SLAs, QoS, Medianet, WAAS etc. with NetFlow Analyzer
Customize your own dashboards, set traffic alerts and generate reports.
Network behavioral analysis & security monitoring. All-in-one tool.
http://pubads.g.doubleclick.net/gampad/clk?id=126839071&iu=/4140/ostg.clktrk
_______________________________________________
GATE-cvs mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/gate-cvs