Revision: 17461
http://sourceforge.net/p/gate/code/17461
Author: adamfunk
Date: 2014-02-26 21:54:45 +0000 (Wed, 26 Feb 2014)
Log Message:
-----------
Halfway to slaying the old beast.
Modified Paths:
--------------
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/AbstractTermbank.java
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/AnnotationTermbank.java
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/HyponymyTermbank.java
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/TfIdfTermbank.java
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/gui/HyponymyDebugger.java
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/modes/Normalization.java
Modified:
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/AbstractTermbank.java
===================================================================
---
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/AbstractTermbank.java
2014-02-26 21:29:40 UTC (rev 17460)
+++
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/AbstractTermbank.java
2014-02-26 21:54:45 UTC (rev 17461)
@@ -42,13 +42,13 @@
protected Map<ScoreType, Map<Term, Number>> scores;
protected Map<Term, Set<String>> termDocuments;
+ public static final String RAW_SUFFIX = ".raw";
protected Map<Term, Double> termScores;
- protected Map<Term, Double> rawTermScores;
protected List<Term> termsByDescendingScore;
protected Map<Term, Integer> termFrequencies, docFrequencies;
protected boolean descendingScoresDone = false;
-
+ // TODO delete when FrequencyTableModel is superseded
public static final String freqProperty = "frequency";
protected List<ScoreType> scoreTypes;
@@ -136,16 +136,19 @@
return new HashSet<String>();
}
+ // TODO delete when FrequencyTableModel is superseded
@Deprecated
public Map<Term, Integer> getTermFrequencies() {
return this.termFrequencies;
}
+ // TODO delete when FrequencyTableModel is superseded
@Deprecated
public Map<Term, Integer> getDocFrequencies() {
return this.docFrequencies;
}
+ // TODO delete when FrequencyTableModel is superseded
@Deprecated
public String getFreqProperty() {
return freqProperty;
@@ -251,18 +254,6 @@
}
- @Deprecated
- public Double getRawScore(Term term) {
- if (rawTermScores.containsKey(term)) {
- return rawTermScores.get(term).doubleValue();
- }
-
- // error code
- return null;
- }
-
-
-
/* Methods for saving as CSV */
public void saveAsCsv(Number threshold, File outputFile) throws
GateException {
Modified:
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/AnnotationTermbank.java
===================================================================
---
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/AnnotationTermbank.java
2014-02-26 21:29:40 UTC (rev 17460)
+++
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/AnnotationTermbank.java
2014-02-26 21:54:45 UTC (rev 17461)
@@ -16,13 +16,9 @@
import gate.*;
import gate.termraider.util.*;
import gate.termraider.modes.*;
-
import java.util.*;
-import org.apache.commons.lang.StringEscapeUtils;
-
-
@CreoleResource(name = "AnnotationTermbank",
icon = "termbank-lr.png",
comment = "TermRaider Termbank derived from document annotations")
Modified:
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/HyponymyTermbank.java
===================================================================
---
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/HyponymyTermbank.java
2014-02-26 21:29:40 UTC (rev 17460)
+++
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/HyponymyTermbank.java
2014-02-26 21:54:45 UTC (rev 17461)
@@ -16,9 +16,6 @@
import gate.*;
import gate.termraider.modes.*;
import gate.termraider.util.*;
-
-import org.apache.commons.lang.StringEscapeUtils;
-
import java.util.*;
@@ -40,7 +37,7 @@
/* EXTRA DATA FOR ANALYSIS */
private Map<Term, Set<String>> termHeads;
private Map<Term, Set<String>> termHyponyms;
- private ScoreType termFrequencyST, localDocFrequencyST;
+ private ScoreType termFrequencyST, localDocFrequencyST, rawScoreST;
/* Methods for the debugging GUI to get the data */
@@ -126,9 +123,8 @@
for (Term term : terms) {
double rawScore = calculateOneRawScore(term);
- rawTermScores.put(term, rawScore);
double score = Normalization.normalizeScore(rawScore);
- termScores.put(term, score);
+ Utilities.setScoreTermValue(scores, rawScoreST, term, rawScore);
Utilities.setScoreTermValue(scores, localDocFrequencyST, term,
this.termDocuments.size());
}
@@ -143,7 +139,6 @@
termHyponyms = new HashMap<Term, Set<String>>();
termDocuments = new HashMap<Term, Set<String>>();
termScores = new HashMap<Term, Double>();
- rawTermScores = new HashMap<Term, Double>();
termsByDescendingScore = new ArrayList<Term>();
termFrequencies = new HashMap<Term, Integer>();
docFrequencies = new HashMap<Term, Integer>();
@@ -153,6 +148,7 @@
protected void initializeScoreTypes() {
this.scoreTypes = new ArrayList<ScoreType>();
this.scoreTypes.add(new ScoreType(scoreProperty));
+
this.termFrequencyST = new ScoreType("termFrequency");
this.scoreTypes.add(termFrequencyST);
this.localDocFrequencyST = new ScoreType("localDocFrequency");
Modified:
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/TfIdfTermbank.java
===================================================================
---
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/TfIdfTermbank.java
2014-02-26 21:29:40 UTC (rev 17460)
+++
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/TfIdfTermbank.java
2014-02-26 21:54:45 UTC (rev 17461)
@@ -17,13 +17,10 @@
import gate.*;
import gate.termraider.modes.*;
import gate.termraider.util.*;
-
import java.util.*;
-import org.apache.commons.lang.StringEscapeUtils;
-
@CreoleResource(name = "TfIdfTermbank",
icon = "termbank-lr.png",
comment = "TermRaider Termbank derived from vectors in document
features")
@@ -39,7 +36,7 @@
private DocumentFrequencyBank docFreqSource;
/* EXTRA DATA */
- private ScoreType termFrequencyST, localDocFrequencyST, refDocFrequencyST;
+ private ScoreType rawScoreST, termFrequencyST, localDocFrequencyST,
refDocFrequencyST;
protected void processDocument(Document document) {
@@ -49,7 +46,7 @@
for (Annotation candidate : candidates) {
Term term = makeTerm(candidate, document);
- incrementTermFreq(term, 1);
+ Utilities.incrementScoreTermValue(scores, termFrequencyST, term, 1);
Utilities.addToMapSet(termDocuments, term, documentSource);
}
@@ -59,6 +56,8 @@
protected void initializeScoreTypes() {
this.scoreTypes = new ArrayList<ScoreType>();
this.scoreTypes.add(new ScoreType(scoreProperty));
+ this.rawScoreST = new ScoreType(scoreProperty +
AbstractTermbank.RAW_SUFFIX);
+ this.scoreTypes.add(rawScoreST);
this.termFrequencyST = new ScoreType("termFrequency");
this.scoreTypes.add(termFrequencyST);
this.localDocFrequencyST = new ScoreType("localDocFrequency");
@@ -79,15 +78,13 @@
int df = getRefDocFrequency(term);
int n = docFreqSource.getDocumentCount();
double score = TfCalculation.calculate(tfCalculation, tf) *
IdfCalculation.calculate(idfCalculation, df, n);
- rawTermScores.put(term, Double.valueOf(score));
- termScores.put(term, Normalization.normalizeScore(score));
+ Utilities.setScoreTermValue(scores, rawScoreST, term, score);
+ double normalized = Normalization.normalizeScore(score);
+ Utilities.setScoreTermValue(scores, getDefaultScoreType(), term,
normalized);
}
-
- termsByDescendingScore = new ArrayList<Term>(termScores.keySet());
- Collections.sort(termsByDescendingScore, new
TermComparatorByDescendingScore(termScores));
-
+
if (debugMode) {
- System.out.println("Termbank: nbr of terms = " +
termsByDescendingScore.size());
+ System.out.println("Termbank: nbr of terms = " + this.getTerms().size());
}
}
@@ -95,8 +92,6 @@
protected void resetScores() {
termDocuments = new HashMap<Term, Set<String>>();
termScores = new HashMap<Term, Double>();
- rawTermScores = new HashMap<Term, Double>();
- termsByDescendingScore = new ArrayList<Term>();
termFrequencies = new HashMap<Term, Integer>();
docFrequencies = new HashMap<Term, Integer>();
documentCount = 0;
Modified:
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/gui/HyponymyDebugger.java
===================================================================
---
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/gui/HyponymyDebugger.java
2014-02-26 21:29:40 UTC (rev 17460)
+++
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/gui/HyponymyDebugger.java
2014-02-26 21:54:45 UTC (rev 17461)
@@ -80,7 +80,7 @@
class HDTableModel extends AbstractTableModel {
private static final long serialVersionUID = -1124137938074923640L;
- private String[] columnNames = {"term", "raw score", "docs", "docs",
"hyponyms", "hyponyms", "heads"};
+ private String[] columnNames = {"term", "score", "docs", "docs", "hyponyms",
"hyponyms", "heads"};
private Map<Term, Set<String>> termDocuments, termHyponyms, termHeads;
private List<Term> terms;
private HyponymyTermbank termbank;
@@ -126,7 +126,7 @@
result = term.toString();
break;
case 1:
- result = Double.toString(termbank.getRawScore(term));
+ result = Double.toString(termbank.getScore(term));
break;
case 2:
result = Integer.toString(termDocuments.get(term).size());
Modified:
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/modes/Normalization.java
===================================================================
---
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/modes/Normalization.java
2014-02-26 21:29:40 UTC (rev 17460)
+++
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/modes/Normalization.java
2014-02-26 21:54:45 UTC (rev 17461)
@@ -29,8 +29,8 @@
}
- // TODO: make the following private and add normalization
- // options to the termbanks (except DFB)
+ // TODO: make the following private, remove deprecation,
+ // and add normalization options to the termbanks (except DFB)
/**
* The following produces the right half of a sigmoid
@@ -39,6 +39,7 @@
* @param score from 0 to inf
* @return score from 0 to 100
*/
+ @Deprecated
public static double normalizeScore(double score) {
double norm = 2.0 / (1.0 + Math.exp(-score / xScale)) - 1.0;
return (double) (100.0F * norm);
This was sent by the SourceForge.net collaborative development platform, the
world's largest Open Source development site.
------------------------------------------------------------------------------
Flow-based real-time traffic analytics software. Cisco certified tool.
Monitor traffic, SLAs, QoS, Medianet, WAAS etc. with NetFlow Analyzer
Customize your own dashboards, set traffic alerts and generate reports.
Network behavioral analysis & security monitoring. All-in-one tool.
http://pubads.g.doubleclick.net/gampad/clk?id=126839071&iu=/4140/ostg.clktrk
_______________________________________________
GATE-cvs mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/gate-cvs