Revision: 17462
          http://sourceforge.net/p/gate/code/17462
Author:   adamfunk
Date:     2014-02-26 22:12:56 +0000 (Wed, 26 Feb 2014)
Log Message:
-----------
This plugin need some theology & geometry, some taste & decency.

Modified Paths:
--------------
    
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/AbstractTermbank.java
    
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/DocumentFrequencyBank.java
    
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/HyponymyTermbank.java
    
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/TfIdfTermbank.java
    
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/modes/Normalization.java
    
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/util/Utilities.java

Modified: 
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/AbstractTermbank.java
===================================================================
--- 
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/AbstractTermbank.java
      2014-02-26 21:54:45 UTC (rev 17461)
+++ 
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/AbstractTermbank.java
      2014-02-26 22:12:56 UTC (rev 17462)
@@ -44,10 +44,14 @@
   protected Map<Term, Set<String>>  termDocuments;
   public static final String RAW_SUFFIX = ".raw";
   
+  // TODO get rid of this in favour of the default key of scores
+  @Deprecated
   protected Map<Term, Double>       termScores;
+  
   protected List<Term>              termsByDescendingScore;
   protected Map<Term, Integer>      termFrequencies, docFrequencies;
   protected boolean                 descendingScoresDone = false;
+  
   // TODO delete when FrequencyTableModel is superseded
   public static final String freqProperty = "frequency";
 
@@ -235,15 +239,8 @@
   protected abstract void calculateScores(); 
   
   
-  
+  // TODO: change to use getMainScores()
   @Deprecated
-  protected int incrementTermFreq(Term term, int increment) {
-    return Utilities.incrementMap(termFrequencies, term, increment);
-  }
-  
-  
-  
-  // TODO: change to use getMainScores() 
   public Double getScore(Term term) {
     if (termScores.containsKey(term)) {
       return termScores.get(term).doubleValue();

Modified: 
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/DocumentFrequencyBank.java
===================================================================
--- 
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/DocumentFrequencyBank.java
 2014-02-26 21:54:45 UTC (rev 17461)
+++ 
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/DocumentFrequencyBank.java
 2014-02-26 22:12:56 UTC (rev 17462)
@@ -79,7 +79,6 @@
   protected void resetScores() {
     documentCount = 0;
     documentFrequencies = new HashMap<Term, Integer>();
-    termFrequencies = new HashMap<Term, Integer>();
     languages = new HashSet<String>();
     types = new HashSet<String>();
     stringLookupTable = new HashMap<String, Set<Term>>();

Modified: 
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/HyponymyTermbank.java
===================================================================
--- 
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/HyponymyTermbank.java
      2014-02-26 21:54:45 UTC (rev 17461)
+++ 
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/HyponymyTermbank.java
      2014-02-26 22:12:56 UTC (rev 17462)
@@ -16,6 +16,7 @@
 import gate.*;
 import gate.termraider.modes.*;
 import gate.termraider.util.*;
+
 import java.util.*;
 
 
@@ -32,6 +33,7 @@
   
   /* EXTRA CREOLE PARAMETERS */
   protected List<String> inputHeadFeatures;
+  private Normalization normalization;
 
   
   /* EXTRA DATA FOR ANALYSIS */
@@ -52,8 +54,8 @@
   
   
   private double calculateOneRawScore(Term term) {
-    double docFreq = (double) getSetFromMap(termDocuments, term).size();
-    double hyponyms = (double) getSetFromMap(termHyponyms, term).size();
+    double docFreq = (double) Utilities.getStringSetFromMap(termDocuments, 
term).size();
+    double hyponyms = (double) Utilities.getStringSetFromMap(termHyponyms, 
term).size();
     return docFreq * (1.0F + hyponyms);
   }
 
@@ -83,19 +85,6 @@
 
   
   
-  private Set<String> getSetFromMap(Map<Term, Set<String>> map, Term key) {
-    if (map.containsKey(key)) {
-      return map.get(key);
-    }
-    
-    //implied else
-    Set<String> valueSet = new HashSet<String>();
-    map.put(key, valueSet);
-    return valueSet;
-  }
-  
-  
-
   public void calculateScores() {
     Set<Term> terms = termHeads.keySet();
     Set<String> headsI, headsJ;
@@ -123,8 +112,9 @@
     
     for (Term term : terms) {
       double rawScore = calculateOneRawScore(term);
-      double score = Normalization.normalizeScore(rawScore);
+      double normalized = Normalization.calculate(normalization, rawScore);
       Utilities.setScoreTermValue(scores, rawScoreST, term, rawScore);
+      Utilities.setScoreTermValue(scores, getDefaultScoreType(), term, 
normalized);
       Utilities.setScoreTermValue(scores, localDocFrequencyST, term, 
this.termDocuments.size());
     }
     
@@ -138,7 +128,6 @@
     termHeads       = new HashMap<Term, Set<String>>();
     termHyponyms    = new HashMap<Term, Set<String>>();
     termDocuments   = new HashMap<Term, Set<String>>();
-    termScores      = new HashMap<Term, Double>();
     termsByDescendingScore     = new ArrayList<Term>();
     termFrequencies = new HashMap<Term, Integer>();
     docFrequencies = new HashMap<Term, Integer>();
@@ -175,5 +164,18 @@
     super.setScoreProperty(name);
   }
 
+  
+  @CreoleParameter(comment = "score normalization",
+          defaultValue = "Sigmoid")
+  public void setNormalization(Normalization mode) {
+    this.normalization = mode;
+  }
+  
+  public Normalization getNormalization() {
+    return this.normalization;
+  }
+  
 
+
+
 }

Modified: 
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/TfIdfTermbank.java
===================================================================
--- 
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/TfIdfTermbank.java
 2014-02-26 21:54:45 UTC (rev 17461)
+++ 
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/TfIdfTermbank.java
 2014-02-26 22:12:56 UTC (rev 17462)
@@ -33,6 +33,7 @@
   /* EXTRA CREOLE PARAMETERS */
   private TfCalculation tfCalculation;
   private IdfCalculation idfCalculation;
+  private Normalization normalization;
   private DocumentFrequencyBank docFreqSource;
   
   /* EXTRA DATA */
@@ -77,9 +78,9 @@
       int tf = termFrequencies.get(term);
       int df = getRefDocFrequency(term);
       int n = docFreqSource.getDocumentCount();
-      double score = TfCalculation.calculate(tfCalculation, tf) * 
IdfCalculation.calculate(idfCalculation, df, n);
-      Utilities.setScoreTermValue(scores, rawScoreST, term, score);
-      double normalized = Normalization.normalizeScore(score);
+      double rawScore = TfCalculation.calculate(tfCalculation, tf) * 
IdfCalculation.calculate(idfCalculation, df, n);
+      Utilities.setScoreTermValue(scores, rawScoreST, term, rawScore);
+      double normalized = Normalization.calculate(normalization, rawScore);
       Utilities.setScoreTermValue(scores, getDefaultScoreType(), term, 
normalized);
     }
 
@@ -89,11 +90,11 @@
   }
   
   
+  // TODO termFrequency incrementation may have been lost in the refactoring
+  
   protected void resetScores() {
     termDocuments    = new HashMap<Term, Set<String>>();
-    termScores       = new HashMap<Term, Double>();
     termFrequencies = new HashMap<Term, Integer>();
-    docFrequencies = new HashMap<Term, Integer>();
     documentCount = 0;
   }
 
@@ -113,6 +114,16 @@
     return this.docFreqSource;
   }
   
+  @CreoleParameter(comment = "score normalization",
+          defaultValue = "Sigmoid")
+  public void setNormalization(Normalization mode) {
+    this.normalization = mode;
+  }
+  
+  public Normalization getNormalization() {
+    return this.normalization;
+  }
+  
 
   @CreoleParameter(comment = "term frequency calculation",
           defaultValue = "Logarithmic")

Modified: 
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/modes/Normalization.java
===================================================================
--- 
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/modes/Normalization.java
        2014-02-26 21:54:45 UTC (rev 17461)
+++ 
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/modes/Normalization.java
        2014-02-26 22:12:56 UTC (rev 17462)
@@ -13,6 +13,7 @@
 
 public enum Normalization {
   None,
+  Hundred,
   Sigmoid;
   
   
@@ -24,6 +25,10 @@
       return raw.doubleValue();
     }
     
+    if (mode == Hundred) {
+      return 100.0 * raw.doubleValue();
+    }
+    
     // must be sigmoid
     return normalizeScore(raw.doubleValue());
   }
@@ -44,6 +49,10 @@
     double norm = 2.0 / (1.0 + Math.exp(-score / xScale)) - 1.0;
     return (double) (100.0F * norm);
   }
+  
+  /* Note: Normalization mode does not apply to the AnnotationTermbank, since 
it
+   * is derived from (presumably) already normalized tf.idf values.
+   */
 
   
 }

Modified: 
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/util/Utilities.java
===================================================================
--- 
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/util/Utilities.java
     2014-02-26 21:54:45 UTC (rev 17461)
+++ 
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/util/Utilities.java
     2014-02-26 22:12:56 UTC (rev 17462)
@@ -221,7 +221,17 @@
     submap.put(term, count);
     map.put(type, submap);
   }
+
   
+  public static Set<String> getStringSetFromMap(Map<Term, Set<String>> map, 
Term key) {
+    if (map.containsKey(key)) {
+      return map.get(key);
+    }
+    
+    //implied else
+    Set<String> valueSet = new HashSet<String>();
+    map.put(key, valueSet);
+    return valueSet;
+  }
 
-
 }

This was sent by the SourceForge.net collaborative development platform, the 
world's largest Open Source development site.


------------------------------------------------------------------------------
Flow-based real-time traffic analytics software. Cisco certified tool.
Monitor traffic, SLAs, QoS, Medianet, WAAS etc. with NetFlow Analyzer
Customize your own dashboards, set traffic alerts and generate reports.
Network behavioral analysis & security monitoring. All-in-one tool.
http://pubads.g.doubleclick.net/gampad/clk?id=126839071&iu=/4140/ostg.clktrk
_______________________________________________
GATE-cvs mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/gate-cvs

Reply via email to