Revision: 17403
          http://sourceforge.net/p/gate/code/17403
Author:   adamfunk
Date:     2014-02-23 21:09:42 +0000 (Sun, 23 Feb 2014)
Log Message:
-----------
DF viewer works; CSV works...

Modified Paths:
--------------
    gate/trunk/plugins/TermRaider/src/gate/termraider/bank/AbstractPairbank.java
    gate/trunk/plugins/TermRaider/src/gate/termraider/bank/AbstractTermbank.java
    
gate/trunk/plugins/TermRaider/src/gate/termraider/bank/DocumentFrequencyBank.java
    
gate/trunk/plugins/TermRaider/src/gate/termraider/gui/DocumentFrequencyViewer.java
    gate/trunk/plugins/TermRaider/src/gate/termraider/gui/SliderPanel.java
    gate/trunk/plugins/TermRaider/src/gate/termraider/output/CsvGenerator.java
    gate/trunk/plugins/TermRaider/src/gate/termraider/util/AbstractBank.java

Modified: 
gate/trunk/plugins/TermRaider/src/gate/termraider/bank/AbstractPairbank.java
===================================================================
--- 
gate/trunk/plugins/TermRaider/src/gate/termraider/bank/AbstractPairbank.java    
    2014-02-22 14:44:43 UTC (rev 17402)
+++ 
gate/trunk/plugins/TermRaider/src/gate/termraider/bank/AbstractPairbank.java    
    2014-02-23 21:09:42 UTC (rev 17403)
@@ -31,7 +31,6 @@
   private static final long serialVersionUID = 424942970862740181L;
 
   // CREOLE init parameters
-  protected boolean debugMode;
   protected String inputASName;
 
   protected transient List<Action> actionsList;
@@ -223,18 +222,7 @@
   public String getInputASName() {
     return this.inputASName;
   }
-  
-  
-  @CreoleParameter(comment = "print debugging information during 
initialization",
-          defaultValue = "false")
-  public void setDebugMode(Boolean debug) {
-    this.debugMode = debug;
-  }
 
-  public Boolean getDebugMode() {
-    return this.debugMode;
-  }
-
 }
 
 

Modified: 
gate/trunk/plugins/TermRaider/src/gate/termraider/bank/AbstractTermbank.java
===================================================================
--- 
gate/trunk/plugins/TermRaider/src/gate/termraider/bank/AbstractTermbank.java    
    2014-02-22 14:44:43 UTC (rev 17402)
+++ 
gate/trunk/plugins/TermRaider/src/gate/termraider/bank/AbstractTermbank.java    
    2014-02-23 21:09:42 UTC (rev 17403)
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2008--2013, The University of Sheffield. See the file
+ *  Copyright (c) 2008--2014, The University of Sheffield. See the file
  *  COPYRIGHT.txt in the software or at http://gate.ac.uk/gate/COPYRIGHT.txt
  *
  *  This file is part of GATE (see http://gate.ac.uk/), and is free
@@ -31,7 +31,6 @@
   private static final long serialVersionUID = -2809051430169834059L;
   
   // CREOLE init parameters
-  protected boolean debugMode;
   protected String inputASName;
   protected Set<String> inputAnnotationTypes;
 
@@ -226,8 +225,7 @@
   /* Methods for saving as CSV */
   
   public void saveAsCsv(double threshold, File outputFile) throws 
GateException {
-    CsvGenerator generator = new CsvGenerator();
-    generator.generateAndSaveCsv(this, threshold, outputFile);
+    CsvGenerator.generateAndSaveCsv(this, threshold, outputFile);
   }
 
   /**
@@ -236,9 +234,7 @@
    * @throws GateException
    */
   public void saveAsCsv(File outputFile) throws GateException {
-    double threshold = this.getMinScore();
-    CsvGenerator generator = new CsvGenerator();
-    generator.generateAndSaveCsv(this, threshold, outputFile);
+    saveAsCsv(this.getMinScore(), outputFile);
   }
   
   
@@ -293,16 +289,4 @@
     return this.inputAnnotationTypes;
   }
   
-  
-  @CreoleParameter(comment = "print debugging information during 
initialization",
-          defaultValue = "false")
-  public void setDebugMode(Boolean debug) {
-    this.debugMode = debug;
-  }
-
-  public Boolean getDebugMode() {
-    return this.debugMode;
-  }
-  
-
 }

Modified: 
gate/trunk/plugins/TermRaider/src/gate/termraider/bank/DocumentFrequencyBank.java
===================================================================
--- 
gate/trunk/plugins/TermRaider/src/gate/termraider/bank/DocumentFrequencyBank.java
   2014-02-22 14:44:43 UTC (rev 17402)
+++ 
gate/trunk/plugins/TermRaider/src/gate/termraider/bank/DocumentFrequencyBank.java
   2014-02-23 21:09:42 UTC (rev 17403)
@@ -13,7 +13,9 @@
 
 import java.io.File;
 import java.util.*;
+
 import javax.swing.Action;
+
 import gate.Annotation;
 import gate.AnnotationSet;
 import gate.Corpus;
@@ -25,6 +27,7 @@
 import gate.creole.metadata.CreoleResource;
 import gate.gui.ActionsPublisher;
 import gate.termraider.gui.ActionSaveCsv;
+import gate.termraider.output.CsvGenerator;
 import gate.termraider.util.*;
 import gate.util.GateException;
 
@@ -230,11 +233,19 @@
   public int getMaxFrequency() {
     return this.maxFrequency;
   }
+  
+  
+  public List<Term> getTermsByDescendingFreq() {
+    List<Term> terms = new ArrayList<Term>(this.getTerms());
+    Comparator<Term> comparator = new 
TermComparatorByDescendingScore(documentFrequencies);
+    Collections.sort(terms, comparator);
+    return terms;
+  }
 
+  
   @Override
   public void saveAsCsv(double threshold, File file) throws GateException {
-    System.out.println("CSV output has not yet been implemented.");
-    // TODO Auto-generated method stub
+    CsvGenerator.generateAndSaveCsv(this, threshold, file);
   }
 
 

Modified: 
gate/trunk/plugins/TermRaider/src/gate/termraider/gui/DocumentFrequencyViewer.java
===================================================================
--- 
gate/trunk/plugins/TermRaider/src/gate/termraider/gui/DocumentFrequencyViewer.java
  2014-02-22 14:44:43 UTC (rev 17402)
+++ 
gate/trunk/plugins/TermRaider/src/gate/termraider/gui/DocumentFrequencyViewer.java
  2014-02-23 21:09:42 UTC (rev 17403)
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2008--2012, The University of Sheffield. See the file
+ *  Copyright (c) 2008--2014, The University of Sheffield. See the file
  *  COPYRIGHT.txt in the software or at http://gate.ac.uk/gate/COPYRIGHT.txt
  *
  *  This file is part of GATE (see http://gate.ac.uk/), and is free
@@ -19,8 +19,10 @@
 import gate.event.ProgressListener;
 import gate.termraider.bank.*;
 import gate.termraider.util.*;
+
 import java.awt.BorderLayout;
 import java.util.*;
+
 import javax.swing.JPanel;
 import javax.swing.JScrollPane;
 import javax.swing.JSplitPane;
@@ -49,6 +51,7 @@
   private ListTableModel typeTableModel, langTableModel;
   private JTextField docsField;
   
+  
   @Override
   public Resource init() {
     initGuiComponents();
@@ -74,29 +77,23 @@
     dfTab.add(freqScrollPane, BorderLayout.CENTER);
     
     JSplitPane listsTab = new JSplitPane(JSplitPane.HORIZONTAL_SPLIT);
-    typeTableModel = new ListTableModel("Term annotation types");
+    typeTableModel = new ListTableModel("Annotation types indexed");
     typeTable = new JTable(typeTableModel);
-    langTableModel = new ListTableModel("Language codes");
+    typeTable.setAutoCreateRowSorter(true);
+    JScrollPane typeScrollPane = new JScrollPane(typeTable, 
+            JScrollPane.VERTICAL_SCROLLBAR_AS_NEEDED, 
+            JScrollPane.HORIZONTAL_SCROLLBAR_AS_NEEDED);
+    
+    langTableModel = new ListTableModel("Language codes indexed");
     langTable = new JTable(langTableModel);
-    listsTab.setLeftComponent(typeTable);
-    listsTab.setRightComponent(langTable);
-    tabbedPane.addTab("Types and languages", listsTab);
-    
-    // TODO
-    // wrap each table in a pane with optional scrolling
-    /*
-         termTable.setAutoCreateRowSorter(true);
-    pairTable.setAutoCreateRowSorter(true);
-    termPane = new JScrollPane(termTable, 
-    JScrollPane.VERTICAL_SCROLLBAR_AS_NEEDED, 
-            JScrollPane.HORIZONTAL_SCROLLBAR_AS_NEEDED);
-    pairPane = new JScrollPane(pairTable, 
+    langTable.setAutoCreateRowSorter(true);
+    JScrollPane langScrollPane = new JScrollPane(langTable, 
             JScrollPane.VERTICAL_SCROLLBAR_AS_NEEDED, 
             JScrollPane.HORIZONTAL_SCROLLBAR_AS_NEEDED);
-    splitPane.setLeftComponent(termPane);
-    splitPane.setRightComponent(pairPane);
 
-     */
+    listsTab.setLeftComponent(typeScrollPane);
+    listsTab.setRightComponent(langScrollPane);
+    tabbedPane.addTab("Types and languages", listsTab);
     
     this.add(tabbedPane, BorderLayout.CENTER);
     tabbedPane.validate();
@@ -189,6 +186,7 @@
 
   public ListTableModel(String heading) {
     this.heading = heading;
+    this.strings = new ArrayList<String>();
   }
   
   public void setList(Collection<String> strings) {

Modified: gate/trunk/plugins/TermRaider/src/gate/termraider/gui/SliderPanel.java
===================================================================
--- gate/trunk/plugins/TermRaider/src/gate/termraider/gui/SliderPanel.java      
2014-02-22 14:44:43 UTC (rev 17402)
+++ gate/trunk/plugins/TermRaider/src/gate/termraider/gui/SliderPanel.java      
2014-02-23 21:09:42 UTC (rev 17403)
@@ -28,7 +28,9 @@
   
   
   
-  
+  // TODO
+  // Add another constructor for DocumentFrequencyBank, with more
+  // suitable slider range calculations
   public SliderPanel(AbstractBank scoredbank, String verb, boolean startLeft,
           TermbankViewer viewer) {
     this.scoredbank = scoredbank;

Modified: 
gate/trunk/plugins/TermRaider/src/gate/termraider/output/CsvGenerator.java
===================================================================
--- gate/trunk/plugins/TermRaider/src/gate/termraider/output/CsvGenerator.java  
2014-02-22 14:44:43 UTC (rev 17402)
+++ gate/trunk/plugins/TermRaider/src/gate/termraider/output/CsvGenerator.java  
2014-02-23 21:09:42 UTC (rev 17403)
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010--2012, The University of Sheffield. See the file
+ *  Copyright (c) 2010--2014, The University of Sheffield. See the file
  *  COPYRIGHT.txt in the software or at http://gate.ac.uk/gate/COPYRIGHT.txt
  *
  *  This file is part of GATE (see http://gate.ac.uk/), and is free
@@ -12,71 +12,99 @@
 package gate.termraider.output;
 
 import gate.util.GateException;
+
 import java.io.*;
 import java.util.*;
+
 import org.apache.commons.lang.*;
+
 import gate.termraider.bank.*;
 import gate.termraider.util.*;
 
 public class CsvGenerator {
   
-  private AbstractTermbank termbank;
-  private boolean debugMode;
-  private String scorePropertyName;
-  
-  public void generateAndSaveCsv(AbstractTermbank termbank, 
-          double threshold, File outputFile) throws GateException {
-    this.termbank = termbank;
-    this.debugMode = termbank.getDebugMode();
-    this.scorePropertyName = termbank.getScoreProperty();
+  public static void generateAndSaveCsv(AbstractBank bank, 
+          Number threshold, File outputFile) throws GateException {
     PrintWriter writer = initializeWriter(outputFile);
-    generateCsv(writer, threshold);
+    
+    if (bank instanceof AbstractTermbank) {
+      String scorePropertyName = bank.getScoreProperty();
+      generateTermbankCsv((AbstractTermbank) bank, writer, 
threshold.doubleValue(), scorePropertyName);
+    }
+    else if (bank instanceof DocumentFrequencyBank) {
+      generateDFCsv((DocumentFrequencyBank) bank, writer, 
threshold.intValue());
+    }
+    
     writer.flush();
     writer.close();
-    if (debugMode) {
-      System.out.println("Termbank: saved CSV in " + 
outputFile.getAbsolutePath());
+    if (bank.getDebugMode()) {
+      System.out.println("Saved CSV to " + outputFile.getAbsolutePath() +
+              " from " + bank.getName() + " (" + bank.getClass().getName() + 
")");
     }
-
   }
   
   
-  
-  private void generateCsv(PrintWriter writer, double threshold) {
-    Map<Term, Double> termScores = termbank.getTermScores();
-    Map<Term, Set<String>> termDocuments = termbank.getTermDocuments();
+  private static void generateTermbankCsv(AbstractTermbank bank, PrintWriter 
writer, 
+          double threshold, String scorePropertyName) {
+    Map<Term, Double> termScores = bank.getTermScores();
+    Map<Term, Set<String>> termDocuments = bank.getTermDocuments();
     Map<Term, Integer> termFrequencies = null;
-    termFrequencies = termbank.getTermFrequencies();
-    addComment("threshold = " + threshold);
-    List<Term> sortedTerms = termbank.getTermsByDescendingScore();
+    termFrequencies = bank.getTermFrequencies();
+    addComment(bank, "threshold = " + threshold);
+    List<Term> sortedTerms = bank.getTermsByDescendingScore();
     
-    addComment("Unfiltered nbr of terms = " + sortedTerms.size());
+    addComment(bank, "Unfiltered nbr of terms = " + sortedTerms.size());
     int written = 0;
-    writeHeader(writer);
+    writeTermbankHeader(writer);
     
     for (Term term : sortedTerms) {
       Double score = termScores.get(term);
       if (score >= threshold) {
         Set<String> documents = termDocuments.get(term);
         Integer frequency = termFrequencies.get(term);
-        writeContent(writer, term, score, documents, frequency);
+        writeTermBankContent(writer, term, score, documents, frequency, 
scorePropertyName);
         written++;
       }
       else {  // the rest must be lower
         break;
       }
     }
-    addComment("Filtered nbr of terms = " + written);
+    addComment(bank, "Filtered nbr of terms = " + written);
   }
+
   
+  private static void generateDFCsv(DocumentFrequencyBank bank, PrintWriter 
writer, int threshold) {
+    Map<Term, Integer> frequencies = bank.getDocFrequencies();
+    addComment(bank, "threshold = " + threshold);
+    List<Term> sortedTerms = bank.getTermsByDescendingFreq();
+    
+    addComment(bank, "Unfiltered nbr of terms = " + sortedTerms.size());
+    int written = 0;
+    writeDFHeader(writer);
+    writeDFContent(writer, "_TOTAL_DOCS_", bank.getTotalDocs());
+    
+    for (Term term : sortedTerms) {
+      Integer freq = frequencies.get(term);
+      if (freq >= threshold) {
+        writeDFContent(writer, term, freq);
+        written++;
+      }
+      else {  // the rest must be lower
+        break;
+      }
+    }
+    addComment(bank, "Filtered nbr of terms = " + written);
+  }
+
   
-  private void addComment(String commentStr) {
-    if (debugMode) {
-      System.err.println(commentStr);
+  private static void addComment(AbstractBank termbank, String commentStr) {
+    if (termbank.getDebugMode()) {
+      System.out.println(commentStr);
     }
   }
   
   
-  private PrintWriter initializeWriter(File outputFile) throws GateException {
+  private static PrintWriter initializeWriter(File outputFile) throws 
GateException {
     try {
       return new PrintWriter(outputFile);
     } 
@@ -86,8 +114,8 @@
   }
   
   
-  
-  private void writeContent(PrintWriter writer, Term term, Double score, 
Set<String> documents, Integer frequency) {
+  private static void writeTermBankContent(PrintWriter writer, Term term, 
Double score,
+          Set<String> documents, Integer frequency, String scorePropertyName) {
     StringBuilder sb = new StringBuilder();
     sb.append(StringEscapeUtils.escapeCsv(term.getTermString()));
     sb.append(',');
@@ -95,16 +123,18 @@
     sb.append(',');
     sb.append(StringEscapeUtils.escapeCsv(term.getType()));
     sb.append(',');
-    sb.append(StringEscapeUtils.escapeCsv(this.scorePropertyName));
+    sb.append(StringEscapeUtils.escapeCsv(scorePropertyName));
     sb.append(',');
     sb.append(StringEscapeUtils.escapeCsv(score.toString()));
     sb.append(',');
     sb.append(StringEscapeUtils.escapeCsv(Integer.toString(documents.size())));
-    sb.append(',').append(StringEscapeUtils.escapeCsv(frequency.toString()));
+    sb.append(',');
+    sb.append(StringEscapeUtils.escapeCsv(frequency.toString()));
     writer.println(sb.toString());
   }
   
-  private void writeHeader(PrintWriter writer) {
+  
+  private static void writeTermbankHeader(PrintWriter writer) {
     StringBuilder sb = new StringBuilder();
     sb.append(StringEscapeUtils.escapeCsv("Term"));
     sb.append(',').append(StringEscapeUtils.escapeCsv("Lang"));
@@ -115,6 +145,42 @@
     sb.append(',').append(StringEscapeUtils.escapeCsv("Term_Frequency"));
     writer.println(sb.toString());
   }
+
+
+  private static void writeDFContent(PrintWriter writer, Term term, Integer 
frequency) {
+    StringBuilder sb = new StringBuilder();
+    sb.append(StringEscapeUtils.escapeCsv(term.getTermString()));
+    sb.append(',');
+    sb.append(StringEscapeUtils.escapeCsv(term.getLanguageCode()));
+    sb.append(',');
+    sb.append(StringEscapeUtils.escapeCsv(term.getType()));
+    sb.append(',');
+    sb.append(StringEscapeUtils.escapeCsv(frequency.toString()));
+    writer.println(sb.toString());
+  }
+
   
+
+  private static void writeDFContent(PrintWriter writer, String string, 
Integer frequency) {
+    StringBuilder sb = new StringBuilder();
+    sb.append(StringEscapeUtils.escapeCsv(string));
+    sb.append(',');
+    sb.append(StringEscapeUtils.escapeCsv(""));
+    sb.append(',');
+    sb.append(StringEscapeUtils.escapeCsv(""));
+    sb.append(',');
+    sb.append(StringEscapeUtils.escapeCsv(frequency.toString()));
+    writer.println(sb.toString());
+  }
+
+
+  private static void writeDFHeader(PrintWriter writer) {
+    StringBuilder sb = new StringBuilder();
+    sb.append(StringEscapeUtils.escapeCsv("Term"));
+    sb.append(',').append(StringEscapeUtils.escapeCsv("Lang"));
+    sb.append(',').append(StringEscapeUtils.escapeCsv("Type"));
+    sb.append(',').append(StringEscapeUtils.escapeCsv("DocFrequency"));
+    writer.println(sb.toString());
+  }
   
 }

Modified: 
gate/trunk/plugins/TermRaider/src/gate/termraider/util/AbstractBank.java
===================================================================
--- gate/trunk/plugins/TermRaider/src/gate/termraider/util/AbstractBank.java    
2014-02-22 14:44:43 UTC (rev 17402)
+++ gate/trunk/plugins/TermRaider/src/gate/termraider/util/AbstractBank.java    
2014-02-23 21:09:42 UTC (rev 17403)
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010--2012, The University of Sheffield. See the file
+ *  Copyright (c) 2010--2014, The University of Sheffield. See the file
  *  COPYRIGHT.txt in the software or at http://gate.ac.uk/gate/COPYRIGHT.txt
  *
  *  This file is part of GATE (see http://gate.ac.uk/), and is free
@@ -75,6 +75,7 @@
   protected String languageFeature;
   protected String inputAnnotationFeature;
   protected Set<Corpus> corpora;
+  protected boolean debugMode;
 
 
 
@@ -118,5 +119,15 @@
   public Set<Corpus> getCorpora() {
     return this.corpora;
   }
+
+  @CreoleParameter(comment = "print debugging information during 
initialization",
+          defaultValue = "false")
+  public void setDebugMode(Boolean debug) {
+    this.debugMode = debug;
+  }
+
+  public Boolean getDebugMode() {
+    return this.debugMode;
+  }
   
 }

This was sent by the SourceForge.net collaborative development platform, the 
world's largest Open Source development site.


------------------------------------------------------------------------------
Managing the Performance of Cloud-Based Applications
Take advantage of what the Cloud has to offer - Avoid Common Pitfalls.
Read the Whitepaper.
http://pubads.g.doubleclick.net/gampad/clk?id=121054471&iu=/4140/ostg.clktrk
_______________________________________________
GATE-cvs mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/gate-cvs

Reply via email to