Revision: 17445
          http://sourceforge.net/p/gate/code/17445
Author:   adamfunk
Date:     2014-02-26 16:44:09 +0000 (Wed, 26 Feb 2014)
Log Message:
-----------
More juggling & decrufting

Modified Paths:
--------------
    
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/AbstractTermbank.java
    
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/AnnotationTermbank.java
    
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/HyponymyTermbank.java
    
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/TfIdfTermbank.java
    
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/modes/IdfCalculation.java
    
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/modes/TfCalculation.java
    
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/util/Utilities.java

Added Paths:
-----------
    
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/modes/Normalization.java

Modified: 
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/AbstractTermbank.java
===================================================================
--- 
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/AbstractTermbank.java
      2014-02-26 16:19:55 UTC (rev 17444)
+++ 
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/AbstractTermbank.java
      2014-02-26 16:44:09 UTC (rev 17445)
@@ -205,32 +205,20 @@
 
   /* BEHOLD THE GUBBINS to distinguish the various types of Termbanks */
 
-  /**
-   * This method needs to call incrementTermFreq(...)!
-   */
+  protected abstract void resetScores();
+
   protected abstract void processDocument(Document document);
   
   protected abstract void calculateScores(); 
   
-  protected abstract void resetScores();
   
-
   
   
   protected int incrementTermFreq(Term term, int increment) {
-    return incrementMap(termFrequencies, term, increment);
+    return Utilities.incrementMap(termFrequencies, term, increment);
   }
   
   
-  protected int incrementMap(Map<Term, Integer> map, Term key, int increment) {
-    int count = 0;
-    if (map.containsKey(key)) {
-      count = map.get(key).intValue();
-    }
-    count += increment;
-    map.put(key, Integer.valueOf(count));
-    return count;
-  }
   
   
   public Double getScore(Term term) {

Modified: 
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/AnnotationTermbank.java
===================================================================
--- 
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/AnnotationTermbank.java
    2014-02-26 16:19:55 UTC (rev 17444)
+++ 
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/AnnotationTermbank.java
    2014-02-26 16:44:09 UTC (rev 17445)
@@ -81,7 +81,7 @@
         }
 
         rawTermScores.put(term, score);
-        termScores.put(term, Utilities.normalizeScore(score));
+        termScores.put(term, Normalization.normalizeScore(score));
       }
       
       termsByDescendingScore = new ArrayList<Term>(termScores.keySet());

Modified: 
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/HyponymyTermbank.java
===================================================================
--- 
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/HyponymyTermbank.java
      2014-02-26 16:19:55 UTC (rev 17444)
+++ 
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/HyponymyTermbank.java
      2014-02-26 16:44:09 UTC (rev 17445)
@@ -14,6 +14,7 @@
 import gate.creole.metadata.*;
 import gate.gui.ActionsPublisher;
 import gate.*;
+import gate.termraider.bank.modes.*;
 import gate.termraider.util.*;
 import org.apache.commons.lang.StringEscapeUtils;
 import java.util.*;
@@ -135,7 +136,7 @@
     for (Term term : terms) {
       double rawScore = calculateOneRawScore(term);
       rawTermScores.put(term, rawScore);
-      double score = Utilities.normalizeScore(rawScore);
+      double score = Normalization.normalizeScore(rawScore);
       termScores.put(term, score);
     }
     

Modified: 
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/TfIdfTermbank.java
===================================================================
--- 
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/TfIdfTermbank.java
 2014-02-26 16:19:55 UTC (rev 17444)
+++ 
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/TfIdfTermbank.java
 2014-02-26 16:44:09 UTC (rev 17445)
@@ -15,8 +15,7 @@
 import gate.creole.metadata.*;
 import gate.gui.ActionsPublisher;
 import gate.*;
-import gate.termraider.bank.modes.IdfCalculation;
-import gate.termraider.bank.modes.TfCalculation;
+import gate.termraider.bank.modes.*;
 import gate.termraider.util.*;
 import java.util.*;
 import org.apache.commons.lang.StringEscapeUtils;
@@ -87,7 +86,7 @@
       int n = docFreqSource.getTotalDocs();
       double score = TfCalculation.calculate(tfCalculation, tf) * 
IdfCalculation.calculate(idfCalculation, df, n);
       rawTermScores.put(term, Double.valueOf(score));
-      termScores.put(term, Utilities.normalizeScore(score));
+      termScores.put(term, Normalization.normalizeScore(score));
     }
     
     termsByDescendingScore = new ArrayList<Term>(termScores.keySet());

Modified: 
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/modes/IdfCalculation.java
===================================================================
--- 
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/modes/IdfCalculation.java
  2014-02-26 16:19:55 UTC (rev 17444)
+++ 
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/modes/IdfCalculation.java
  2014-02-26 16:44:09 UTC (rev 17445)
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2012, The University of Sheffield. See the file
+ *  Copyright (c) 2012--2014, The University of Sheffield. See the file
  *  COPYRIGHT.txt in the software or at http://gate.ac.uk/gate/COPYRIGHT.txt
  *
  *  This file is part of GATE (see http://gate.ac.uk/), and is free
@@ -11,12 +11,15 @@
  */
 package gate.termraider.bank.modes;
 
+import gate.termraider.util.Utilities;
+
 public enum IdfCalculation {
-  Natural,
-  Logarithmic;
+  Logarithmic,
+  Scaled,
+  Natural;
   
-  /* These calculations are from Manning & Schütze, Foundations of
-   * Statistical NLP, section 15.2 (p.544).
+  /* These calculations are partly based on Manning & Schütze, 
+   * Foundations of Statistical NLP, section 15.2 (p.544).
    */
   
   public static double calculate(IdfCalculation mode, int rawDF, int 
corpusSize) {
@@ -24,25 +27,15 @@
     double n = (double) corpusSize;
     
     if (mode == Logarithmic) {
-      return 1.0 + logarithm(n / (df + 1.0));
+      return 1.0 + Utilities.log2(n / (df + 1.0));
     }
+
+    if (mode == Scaled) {
+      return (1.0 + n )/ (df + 1.0);
+    }
     
-    // TODO: review the df calculation modes; they must always return 
-    // something > 0.
-    
     // must be Natural
     return 1.0 / (df + 1.0);
   }
 
-  public static final double logBase = 2.0;
-  private static double conversion;
-  
-  static {
-    conversion = Math.log10(logBase);
-  }
-  
-  public static double logarithm(double input) {
-    return Math.log10(input) / conversion;
-  }
-  
 }

Added: 
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/modes/Normalization.java
===================================================================
--- 
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/modes/Normalization.java
                           (rev 0)
+++ 
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/modes/Normalization.java
   2014-02-26 16:44:09 UTC (rev 17445)
@@ -0,0 +1,37 @@
+package gate.termraider.bank.modes;
+
+public enum Normalization {
+  None,
+  Sigmoid;
+  
+  
+  private static double xScale = 4.8;
+
+  
+  public static double calculate(Normalization mode, Number raw) {
+    if (mode == None) {
+      return raw.doubleValue();
+    }
+    
+    // must be sigmoid
+    return normalizeScore(raw.doubleValue());
+  }
+  
+  
+  // TODO: make the following private and add normalization
+  // options to the termbanks (except DFB)
+  
+  /**
+   * The following produces the right half of a sigmoid 
+   * curve adjusted so that
+   * f(0) = 0; f(inf) = 100; f(x>0) > 0
+   * @param score from 0 to inf 
+   * @return score from 0 to 100
+   */
+  public static double normalizeScore(double score) {
+    double norm = 2.0 / (1.0 + Math.exp(-score / xScale)) - 1.0;
+    return (double) (100.0F * norm);
+  }
+
+  
+}


Property changes on: 
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/modes/Normalization.java
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+Id
\ No newline at end of property
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Modified: 
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/modes/TfCalculation.java
===================================================================
--- 
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/modes/TfCalculation.java
   2014-02-26 16:19:55 UTC (rev 17444)
+++ 
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/modes/TfCalculation.java
   2014-02-26 16:44:09 UTC (rev 17445)
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2012, The University of Sheffield. See the file
+ *  Copyright (c) 2012--2014, The University of Sheffield. See the file
  *  COPYRIGHT.txt in the software or at http://gate.ac.uk/gate/COPYRIGHT.txt
  *
  *  This file is part of GATE (see http://gate.ac.uk/), and is free
@@ -11,8 +11,11 @@
  */
 package gate.termraider.bank.modes;
 
+import gate.termraider.util.Utilities;
+
 public enum TfCalculation {
   Natural,
+  Sqrt,
   Logarithmic;
   
   
@@ -20,9 +23,13 @@
     double tf = (double) rawTF;
     
     if (mode == Logarithmic) {
-      return 1.0 + IdfCalculation.logarithm(tf);
+      return 1.0 + Utilities.log2(tf);
     }
     
+    else if (mode == Sqrt) {
+      return Math.sqrt(tf);
+    }
+    
     // must be Natural
     return tf;
   }

Modified: 
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/util/Utilities.java
===================================================================
--- 
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/util/Utilities.java
     2014-02-26 16:19:55 UTC (rev 17444)
+++ 
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/util/Utilities.java
     2014-02-26 16:44:09 UTC (rev 17445)
@@ -25,13 +25,23 @@
   public static final String EXTENSION_CSV = "csv";
 
   private static double log10of2;
-  private static double xScale = 4.8;
   
   static {
     log10of2 = Math.log10(2.0);
   }
 
+  
+  public static int incrementMap(Map<Term, Integer> map, Term key, int 
increment) {
+    int count = 0;
+    if (map.containsKey(key)) {
+      count = map.get(key).intValue();
+    }
+    count += increment;
+    map.put(key, Integer.valueOf(count));
+    return count;
+  }
 
+
   public static double meanDoubleList(List<Double> list) {
     if (list.isEmpty()) {
       return 0.0;
@@ -44,20 +54,7 @@
     return total / ((double) list.size());
   }
   
-  /**
-   * The following produces the right half of a sigmoid 
-   * curve adjusted so that
-   * f(0) = 0; f(inf) = 100; f(x>0) > 0
-   * @param score from 0 to inf 
-   * @return score from 0 to 100
-   */
-  public static double normalizeScore(double score) {
-    double norm = 2.0 / (1.0 + Math.exp(-score / xScale)) - 1.0;
-    return (double) (100.0F * norm);
-  }
 
-  
-
   public static Double convertToDouble(Object x) {
     if (x instanceof Number) {
       return ((Number) x).doubleValue();
@@ -131,10 +128,7 @@
     return url.toString();
   }
   
-  
-  
-  
-  
+
   public static File addExtensionIfNotExtended(File file, String extension) {
     String name = file.getName();
     if (name.contains(".")) {

This was sent by the SourceForge.net collaborative development platform, the 
world's largest Open Source development site.


------------------------------------------------------------------------------
Flow-based real-time traffic analytics software. Cisco certified tool.
Monitor traffic, SLAs, QoS, Medianet, WAAS etc. with NetFlow Analyzer
Customize your own dashboards, set traffic alerts and generate reports.
Network behavioral analysis & security monitoring. All-in-one tool.
http://pubads.g.doubleclick.net/gampad/clk?id=126839071&iu=/4140/ostg.clktrk
_______________________________________________
GATE-cvs mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/gate-cvs

Reply via email to