Revision: 17534
http://sourceforge.net/p/gate/code/17534
Author: adamfunk
Date: 2014-03-04 18:51:39 +0000 (Tue, 04 Mar 2014)
Log Message:
-----------
Put the normalization back in the annotation termbank (aug.tf.idf
doesn't work properly if you augment the normalized values).
Modified Paths:
--------------
gate/trunk/plugins/TermRaider/src/gate/termraider/bank/AnnotationTermbank.java
Modified:
gate/trunk/plugins/TermRaider/src/gate/termraider/bank/AnnotationTermbank.java
===================================================================
---
gate/trunk/plugins/TermRaider/src/gate/termraider/bank/AnnotationTermbank.java
2014-03-04 18:13:52 UTC (rev 17533)
+++
gate/trunk/plugins/TermRaider/src/gate/termraider/bank/AnnotationTermbank.java
2014-03-04 18:51:39 UTC (rev 17534)
@@ -30,10 +30,11 @@
/* EXTRA CREOLE PARAMETERS */
protected String inputScoreFeature;
private MergingMode mergingMode;
+ private Normalization normalization;
/* EXTRA DATA FOR ANALYSIS */
private Map<Term, List<Double>> termIndividualScores;
- private ScoreType termFrequencyST, localDocFrequencyST;
+ private ScoreType rawScoreST, termFrequencyST, localDocFrequencyST;
protected void processDocument(Document document) {
@@ -69,10 +70,12 @@
languages.add(term.getLanguageCode());
types.add(term.getType());
- Double score = MergingMode.calculate(mergingMode,
termIndividualScores.get(term));
- Utilities.setScoreTermValue(scores, getDefaultScoreType(), term, score);
+ Double rawScore = MergingMode.calculate(mergingMode,
termIndividualScores.get(term));
+ Utilities.setScoreTermValue(scores, rawScoreST, term, rawScore);
int localDF = termDocuments.get(term).size();
Utilities.setScoreTermValue(scores, localDocFrequencyST, term, localDF);
+ double normalized = Normalization.calculate(normalization, rawScore);
+ Utilities.setScoreTermValue(scores, getDefaultScoreType(), term,
normalized);
}
if (debugMode) {
@@ -96,6 +99,8 @@
protected void initializeScoreTypes() {
this.scoreTypes = new ArrayList<ScoreType>();
this.scoreTypes.add(new ScoreType(scoreProperty));
+ this.rawScoreST = new ScoreType(scoreProperty +
AbstractTermbank.RAW_SUFFIX);
+ this.scoreTypes.add(rawScoreST);
this.termFrequencyST = new ScoreType("termFrequency");
this.scoreTypes.add(termFrequencyST);
this.localDocFrequencyST = new ScoreType("localDocFrequency");
@@ -125,6 +130,16 @@
return this.mergingMode;
}
+ @CreoleParameter(comment = "score normalization",
+ defaultValue = "Sigmoid")
+ public void setNormalization(Normalization mode) {
+ this.normalization = mode;
+ }
+
+ public Normalization getNormalization() {
+ return this.normalization;
+ }
+
/* override default value from AbstractTermbank */
@CreoleParameter(defaultValue = "tfIdfAug")
public void setScoreProperty(String name) {
This was sent by the SourceForge.net collaborative development platform, the
world's largest Open Source development site.
------------------------------------------------------------------------------
Subversion Kills Productivity. Get off Subversion & Make the Move to Perforce.
With Perforce, you get hassle-free workflows. Merge that actually works.
Faster operations. Version large binaries. Built-in WAN optimization and the
freedom to use Git, Perforce or both. Make the move to Perforce.
http://pubads.g.doubleclick.net/gampad/clk?id=122218951&iu=/4140/ostg.clktrk
_______________________________________________
GATE-cvs mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/gate-cvs